# DEMO: Imputing Missing Values

### 1. Import Packages and Connect to the CAS Server

Visit the documentation for the SWAT [(SAS Scripting Wrapper for Analytics Transfer)](https://sassoftware.github.io/python-swat/index.html) package.

In [None]:
## Packages
import swat
import pandas as pd
import numpy as np

## custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

    

######################
## Connect to CAS   ##
######################

## My personal connection to CAS. You will need to modify your connection object
try:
    conn = connect_to_cas()
except:
    print('My personal connection to CAS. You will need to modify yours using your connection information.')


## General connection syntax
# conn = swat.CAS(host, port, username, password)


## Viya for Learners 3.5 connection
# hostValue = os.environ.get('CASHOST')/
# portValue = os.environ.get('CASPORT')
# passwordToken=os.environ.get('SAS_VIYA_TOKEN')
# conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

## Load the demonstration data into memory

In [None]:
## Create a simple dataframe
df = pd.DataFrame([
            [np.nan, 2, 45, 0, 'A'],
            [3, 4, np.nan, 1,'A'],
            [np.nan, np.nan, 50, np.nan,'B'],
            [np.nan, 3, np.nan, 4,],
            [2, 2, np.nan, 0, 'A'],
            [3, 4, np.nan, 1,'A'],
            [np.nan, np.nan, 75, np.nan,'B'],
            [np.nan, 3, 60, 4,]
            ],
            columns=['col1','col2','col3','col4','col5'])

## Upload the dataframe to the CAS server as a CAS table
castbl = conn.upload_frame(df,
                           casout = {'name':'missing_data', 
                                     'caslib':'casuser', 
                                     'replace':True})

In [None]:
castbl.head(10)

## Using the SWAT nmiss method

In [None]:
castbl.nmiss()

In [None]:
colNames = ['col1','col5']

castbl[colNames].nmiss()

## Distinct CAS action

In [None]:
castbl.distinct()

In [None]:
castbl.distinct(inputs = colNames)

## Summary CAS action

In [None]:
castbl.summary()

In [None]:
castbl.summary(inputs = ['col1','col2'], 
               subSet = ['min','max','nmiss'])

## Terminate the CAS Session

In [None]:
conn.terminate()