In [152]:
## Packages
import swat
import os
import pandas as pd
pd.set_option('display.max_columns', 50)
import numpy as np


## custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

    

######################
## Connect to CAS   ##
######################

## My personal connection to CAS. You will need to modify your connection object
try:
    conn = connect_to_cas()
except:
    print('My personal connection to CAS. You will need to modify yours using your connection information.')


## General connection syntax
# conn = swat.CAS(host, port, username, password)

## Viya for Learners 3.5 connection
hostValue = os.environ.get('CASHOST')
portValue = os.environ.get('CASPORT')
passwordToken=os.environ.get('SAS_VIYA_TOKEN')
conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

CasConnect package not available
My personal connection to CAS. You will need to modify yours using your connection information.


## Create demo CAS table

In [153]:
conn.loadTable(path = 'RAND_RETAILDEMO.sashdat', caslib = 'samples',
               casout = {
                      'name' : 'rand_retaildemo',
                      'caslib' : 'casuser',
                      'replace' : True
               })

conn.partition(table = {'name':'rand_retaildemo', 'caslib':'casuser'}, 
               casout = {'name':'rand_retaildemo', 'caslib':'casuser', 'replace':True})



retailTbl = conn.CASTable('rand_retaildemo', caslib = 'casuser')
newColNames = [{'name':col,'rename':col.lower()} for col in retailTbl.columns.to_list()]
keepColumns = ['custid','bucket','age','loyalty_card','brand_name','channeltype','class']
retailTbl.alterTable(columns = newColNames, 
                     keep = keepColumns)

display(retailTbl.shape, 
        retailTbl.tableDetails(), 
        retailTbl.head())

NOTE: Cloud Analytic Services made the file RAND_RETAILDEMO.sashdat available as table RAND_RETAILDEMO in caslib CASUSER(Peter.Styliadis@sas.com).


(930046, 7)

Unnamed: 0,Node,Blocks,Active,Rows,IndexSize,DataSize,VardataSize,CompressedSize,CompressionRatio,Mapped,MappedMemory,Unmapped,UnmappedMemory,Allocated,AllocatedMemory,DeletedRows
0,ALL,348,348,930046,0,364578032,0,0,0,343,359575104,0,0,5,5042688,0


Unnamed: 0,custid,bucket,age,loyalty_card,brand_name,channeltype,class
0,40506910.0,2.0,75.0,1.0,Oak,Internet,women_handbags
1,40506917.0,2.0,72.0,1.0,Oak,Internet,women_Active
2,40506919.0,2.0,38.0,1.0,Oak,Internet,dairy
3,40506934.0,2.0,,0.0,Oak,Internet,women_tops
4,40506942.0,2.0,50.0,1.0,Oak,Internet,women_tops


## Simple column updates in place

In [154]:
retailTbl.update(set = [
    {'var':'brand_name', 'value':'upcase(brand_name)'},
    {'var':'channeltype', 'value':'lowcase(channeltype)'},
    {'var':'class', 'value':'propcase(class)'}
])

In [155]:
retailTbl.head()

Unnamed: 0,custid,bucket,age,loyalty_card,brand_name,channeltype,class
0,40506910.0,2.0,75.0,1.0,OAK,internet,Women_handbags
1,40506917.0,2.0,72.0,1.0,OAK,internet,Women_active
2,40506919.0,2.0,38.0,1.0,OAK,internet,Dairy
3,40506934.0,2.0,,0.0,OAK,internet,Women_tops
4,40506942.0,2.0,50.0,1.0,OAK,internet,Women_tops


## Update column based on a conditions

In [156]:
retailTbl.distinct(inputs = 'age')

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,age,124.0,673447.0,0.0


Get the mean of the age column

In [157]:
meanAge = retailTbl.age.mean().round(3)
meanAge

43.577

In [158]:
(retailTbl
 .query("age is null")
 .update(set = [
     {'var':'age', 'value':f'{meanAge}'}])
)

### Confirm no missing values exists in age

In [159]:
retailTbl.distinct(inputs = 'age')

Unnamed: 0,Column,NDistinct,NMiss,Trunc
0,age,124.0,0.0,0.0


Notice that all the missing values (673,447) are now the mean age (44)

In [160]:
(retailTbl
 .age
 .value_counts()
)

43.577     673447
19.000       6996
23.000       6944
24.000       6941
21.000       6882
            ...  
97.000         26
98.000         25
94.000         21
105.000        20
140.000        18
Length: 124, dtype: int64

## Save the CAS table as a data soure file

In [149]:
retailTbl.save(name = 'retail_clean.sashdat', caslib = 'casuser')

NOTE: Cloud Analytic Services saved the file retail_clean.sashdat in caslib CASUSER(Peter.Styliadis@sas.com).


In [151]:
conn.fileInfo(caslib = 'casuser')

Unnamed: 0,Permission,Owner,Group,Name,Size,Encryption,Time,ModTime
0,-rw-r--r--,Peter.Styliadis@sas.com,v4e_users,test.csv,65,,2022-06-29T12:56:17-04:00,1972141000.0
1,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,sales.sas7bdat,73728,,2023-02-06T14:19:30-04:00,1991327000.0
2,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,sales.csv,10506,,2022-12-09T12:14:52-04:00,1986222000.0
3,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,heart_raw.sashdat,1051328,NONE,2022-10-12T13:06:07-04:00,1981214000.0
4,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,products.xlsx,225072,,2022-12-09T12:15:02-04:00,1986222000.0
5,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,orders_hd.sashdat,1728621720,NONE,2022-12-09T12:15:01-04:00,1986222000.0
6,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,tsa_claims_raw.csv,34936237,,2023-01-04T13:50:33-04:00,1988474000.0
7,drwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_demo.parquet,4096,,2023-02-21T15:47:19-04:00,1992628000.0
8,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_demo.csv,53297896,,2023-02-27T20:01:49-04:00,1993162000.0
9,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,myfinaltable.sashdat,74432416,NONE,2023-03-13T13:01:42-04:00,1994346000.0


## Delete the source file

In [148]:
conn.deleteSource(source = 'retail_clean.sashdat', caslib = 'casuser')

NOTE: Cloud Analytic Services removed the source data retail_clean.sashdat from caslib CASUSER(Peter.Styliadis@sas.com).


## Terminate the CAS connection

In [None]:
conn.terminate()