In [None]:
## Packages
import swat
import os
import pandas as pd
pd.set_option('display.max_columns', 50)
import numpy as np


## custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

    

######################
## Connect to CAS   ##
######################

## My personal connection to CAS. You will need to modify your connection object
try:
    conn = connect_to_cas()
except:
    print('My personal connection to CAS. You will need to modify yours using your connection information.')


## General connection syntax
# conn = swat.CAS(host, port, username, password)

## Viya for Learners 3.5 connection
hostValue = os.environ.get('CASHOST')
portValue = os.environ.get('CASPORT')
passwordToken=os.environ.get('SAS_VIYA_TOKEN')
conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

## Create demo CAS table

In [47]:
## Load the RAND_RETAILDEMO.sashdat file into memory on the CAS server
conn.loadTable(path = 'RAND_RETAILDEMO.sashdat', caslib = 'samples',
               casout = {
                      'name' : 'rand_retaildemo',
                      'caslib' : 'casuser',
                      'replace' : True
               })

## Reference the CAS table
retailTbl = conn.CASTable('rand_retaildemo', caslib = 'casuser')

## Create a copy of the table with a new column
(retailTbl
 .eval("age_dup = age", inplace = False)          ## create a duplicate of the age column
 .partition(casout = {'name':'rand_retaildemo',
                      'caslib':'casuser',
                      'replace':True})
)


## Create a list of columns to rename 
newColNames = [{'name':col,'rename':col.lower()} for col in retailTbl.columns.to_list()]

## List of columns to keep
keepColumns = ['custid','bucket','age','age_dup','loyalty_card','brand_name','channeltype','class']

## Rename and keep columns
retailTbl.alterTable(columns = newColNames, 
                     keep = keepColumns)

## Preview the new CAS table
display(retailTbl.shape, 
        retailTbl.tableDetails(),
        retailTbl.tableInfo(caslib = 'casuser'),
        retailTbl.head())

NOTE: Cloud Analytic Services made the file RAND_RETAILDEMO.sashdat available as table RAND_RETAILDEMO in caslib CASUSER(Peter.Styliadis@sas.com).


(930046, 8)

Unnamed: 0,Node,Blocks,Active,Rows,IndexSize,DataSize,VardataSize,CompressedSize,CompressionRatio,Mapped,MappedMemory,Unmapped,UnmappedMemory,Allocated,AllocatedMemory,DeletedRows
0,ALL,383,383,930046,0,372018400,0,0,0,351,364683456,0,0,32,7375600,0


Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,RAND_RETAILDEMO,930046,8,0,utf-8,2023-03-31T10:05:33-04:00,2023-03-31T10:05:33-04:00,2023-03-31T10:05:33-04:00,UTF8,1995891000.0,1995891000.0,1995891000.0,0,0,0,0,,,0,Peter.Styliadis@sas.com,,,


Unnamed: 0,custid,bucket,age,age_dup,loyalty_card,brand_name,channeltype,class
0,32368571.0,2.0,68.0,68.0,1.0,Oak,Internet,women_Juniors
1,32368571.0,2.0,68.0,68.0,1.0,Oak,Internet,women_Pants
2,32368625.0,1.0,,,0.0,Oak,Internet,women_socks
3,32368682.0,2.0,,,0.0,Oak,Internet,women_handbags
4,32368682.0,2.0,,,0.0,Oak,Internet,women_jewelry


## Simple column updates in place

In [48]:
retailTbl.update(set = [
    {'var':'brand_name', 'value':'upcase(brand_name)'},
    {'var':'channeltype', 'value':'lowcase(channeltype)'},
    {'var':'class', 'value':'propcase(class)'}
])

In [50]:
retailTbl.head()

Unnamed: 0,custid,bucket,age,age_dup,loyalty_card,brand_name,channeltype,class
0,32368571.0,2.0,68.0,68.0,1.0,OAK,internet,Women_juniors
1,32368571.0,2.0,68.0,68.0,1.0,OAK,internet,Women_pants
2,32368625.0,1.0,,,0.0,OAK,internet,Women_socks
3,32368682.0,2.0,,,0.0,OAK,internet,Women_handbags
4,32368682.0,2.0,,,0.0,OAK,internet,Women_jewelry


## Update column based on a conditions

In [51]:
retailTbl.distinct(inputs = ['age', 'age_dup'])

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,age,124.0,673447.0,0.0
1,age_dup,124.0,673447.0,0.0


Get the mean of the age column

In [52]:
meanAge = retailTbl.age.mean().round(3)
meanAge

43.577

In [53]:
(retailTbl
 .query("age is null")
 .update(set = [
     {'var':'age', 'value':f'{meanAge}'}])
)

### Confirm no missing values exists in age

In [54]:
retailTbl.distinct(inputs = 'age')

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,age,124.0,0.0,0.0


Notice that all the missing values (673,447) are now the mean age (44)

In [55]:
(retailTbl
 .age
 .value_counts()
)

43.577     673447
19.000       6996
23.000       6944
24.000       6941
21.000       6882
            ...  
97.000         26
98.000         25
94.000         21
105.000        20
140.000        18
Length: 124, dtype: int64

## Update rows using conditional logic

In [56]:
retailTbl

CASTable('rand_retaildemo', caslib='casuser')

In [66]:
(retailTbl
 .update(set = [
     {'var':'age_dup', 'value':f'ifn(age_dup = . , {meanAge}, age_dup)'}])
)

### Confirm no missing values exists in age

In [67]:
retailTbl.distinct(inputs = 'age_dup')

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,age_dup,124.0,0.0,0.0


In [68]:
(retailTbl
 .age_dup
 .value_counts()
)

43.577     673447
19.000       6996
23.000       6944
24.000       6941
21.000       6882
            ...  
97.000         26
98.000         25
94.000         21
105.000        20
140.000        18
Length: 124, dtype: int64

## Save the CAS table as a data soure file

In [70]:
retailTbl.save(name = 'retail_clean.sashdat', caslib = 'casuser', replace = True)

NOTE: Cloud Analytic Services saved the file retail_clean.sashdat in caslib CASUSER(Peter.Styliadis@sas.com).


In [71]:
conn.fileInfo(caslib = 'casuser')

Unnamed: 0,Permission,Owner,Group,Name,Size,Encryption,Time,ModTime
0,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_final.csv,39528995,,2023-03-27T10:42:55-04:00,1995547000.0
1,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,sales.sas7bdat,73728,,2023-02-06T14:19:30-04:00,1991327000.0
2,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,sales.csv,10506,,2022-12-09T12:14:52-04:00,1986222000.0
3,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,heart_raw.sashdat,1051328,NONE,2022-10-12T13:06:07-04:00,1981214000.0
4,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,products.xlsx,225072,,2022-12-09T12:15:02-04:00,1986222000.0
5,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,orders_hd.sashdat,1728621720,NONE,2022-12-09T12:15:01-04:00,1986222000.0
6,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,tsa_claims_raw.csv,34936237,,2023-01-04T13:50:33-04:00,1988474000.0
7,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2015.csv,144481,,2023-03-27T09:07:18-04:00,1995542000.0
8,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_demo.csv,53297896,,2023-02-27T20:01:49-04:00,1993162000.0
9,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,myfinaltable.sashdat,74432416,NONE,2023-03-13T13:01:42-04:00,1994346000.0


## Delete the source file

In [72]:
conn.deleteSource(source = 'retail_clean.sashdat', caslib = 'casuser')

NOTE: Cloud Analytic Services removed the source data retail_clean.sashdat from caslib CASUSER(Peter.Styliadis@sas.com).


## Terminate the CAS connection

In [73]:
conn.terminate()