# DEMO: Imputing Missing Values

### 1. Import Packages and Connect to the CAS Server

Visit the documentation for the SWAT [(SAS Scripting Wrapper for Analytics Transfer)](https://sassoftware.github.io/python-swat/index.html) package.

In [1]:
## Packages
import swat
import pandas as pd
import numpy as np

## custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

    

######################
## Connect to CAS   ##
######################

## My personal connection to CAS. You will need to modify your connection object
try:
    conn = connect_to_cas()
except:
    print('My personal connection to CAS. You will need to modify yours using your connection information.')


## General connection syntax
# conn = swat.CAS(host, port, username, password)


## Viya for Learners 3.5 connection
# hostValue = os.environ.get('CASHOST')/
# portValue = os.environ.get('CASPORT')
# passwordToken=os.environ.get('SAS_VIYA_TOKEN')
# conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

## Load the demonstration data into memory

In [2]:
## Create a simple dataframe
df = pd.DataFrame([
            [np.nan, 2, 45, 0, 'A'],
            [3, 4, np.nan, 1,'A'],
            [np.nan, np.nan, 50, np.nan,'B'],
            [np.nan, 3, np.nan, 4,],
            [2, 2, np.nan, 0, 'A'],
            [3, 4, np.nan, 1,'A'],
            [np.nan, np.nan, 75, np.nan,'B'],
            [np.nan, 3, 60, 4,]
            ],
            columns=['col1','col2','col3','col4','col5'])

## Upload the dataframe to the CAS server as a CAS table
castbl = conn.upload_frame(df,
                           casout = {'name':'missing_data', 
                                     'caslib':'casuser', 
                                     'replace':True})

NOTE: Cloud Analytic Services made the uploaded file available as table MISSING_DATA in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: The table MISSING_DATA has been created in caslib CASUSER(Peter.Styliadis@sas.com) from binary data uploaded to Cloud Analytic Services.


In [3]:
castbl.head(10)

Unnamed: 0,col1,col2,col3,col4,col5
0,,2.0,45.0,0.0,A
1,3.0,4.0,,1.0,A
2,,,50.0,,B
3,,3.0,,4.0,
4,2.0,2.0,,0.0,A
5,3.0,4.0,,1.0,A
6,,,75.0,,B
7,,3.0,60.0,4.0,


## Using the SWAT nmiss method

In [4]:
castbl.nmiss()

col1    5
col2    2
col3    4
col4    2
col5    2
dtype: int64

In [6]:
colNames = ['col1','col5']

castbl[colNames].nmiss()

col1    5
col5    2
dtype: int64

## Distinct CAS action

In [8]:
castbl.distinct()

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,col1,3.0,5.0,0.0
1,col2,4.0,2.0,0.0
2,col3,5.0,4.0,0.0
3,col4,4.0,2.0,0.0
4,col5,3.0,2.0,0.0


In [7]:
castbl.distinct(inputs = colNames)

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,col1,3.0,5.0,0.0
1,col5,3.0,2.0,0.0


## Summary CAS action

In [10]:
castbl.summary()

Unnamed: 0,Column,Min,Max,N,NMiss,Mean,Sum,Std,StdErr,Var,USS,CSS,CV,TValue,ProbT,Skewness,Kurtosis
0,col1,2.0,3.0,3.0,5.0,2.666667,8.0,0.57735,0.333333,0.333333,22.0,0.666667,21.650635,8.0,0.015268,-1.732051,
1,col2,2.0,4.0,6.0,2.0,3.0,18.0,0.894427,0.365148,0.8,58.0,4.0,29.81424,8.215838,0.000435,0.0,-1.875
2,col3,45.0,75.0,4.0,4.0,57.5,230.0,13.228757,6.614378,175.0,13750.0,525.0,23.006533,8.693183,0.003203,0.863919,-0.285714
3,col4,0.0,4.0,6.0,2.0,1.666667,10.0,1.861899,0.760117,3.466667,34.0,17.333333,111.71392,2.192645,0.07983,0.723001,-1.875


In [9]:
castbl.summary(inputs = ['col1','col2'], 
               subSet = ['min','max','nmiss'])

Unnamed: 0,Column,Min,Max,NMiss
0,col1,2.0,3.0,5.0
1,col2,2.0,4.0,2.0


## Terminate the CAS Session

In [None]:
conn.terminate()