In [1]:
## Packages
import swat
import pandas as pd
pd.set_option('display.max_columns', 100)
import numpy as np
from casConnect import connect_to_cas ## custom personal module

##
## Connect to CAS
##

## General connection syntax
# conn = swat.CAS(host, port, username, password)

## Viya for Learners 3.5 connection
# hostValue = os.environ.get('CASHOST')
# portValue = os.environ.get('CASPORT')
# passwordToken=os.environ.get('SAS_VIYA_TOKEN')
# conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

## Personal connection
conn = connect_to_cas()

In [2]:
## Load the water_cluster.sashdat table from the samples caslib into CAS in the Casuser caslib.
conn.loadTable(path='WATER_CLUSTER.sashdat', caslib = 'samples',
               casout = {
                   'name':'water_cluster',
                   'caslib':'casuser',
                   'replace':True
               })

NOTE: Cloud Analytic Services made the file WATER_CLUSTER.sashdat available as table WATER_CLUSTER in caslib CASUSER(Peter.Styliadis@sas.com).


In [3]:
## Reference your new CAS table
castbl = conn.CASTable('water_cluster', caslib = 'casuser')

In [4]:
## Preview the CAS table
castbl.head()

Unnamed: 0,Year,Month,Day,Date,Serial,Property,Address,City,Zip,Lat,Long,Property_type,Meter_Location,Clli,DMA,Weekday,Weekend,Daily_W_C_M3,Week,US Holiday,CLUSTER
0,2014.0,1.0,31.0,2014-01-31,955.0,773.0,1800 POST OAK BLVD,HOUSTON,77056.0,-95.461478,29.7482,0.0,internal,HSTNTXNA,1.0,6.0,0.0,4.376,4.0,,4.0
1,2015.0,12.0,26.0,2015-12-26,1076.0,879.0,1811 E CROSSTIMBERS ST,HOUSTON,77093.0,-95.352264,29.828844,0.0,external,HSTNTXOX,2.0,5.0,0.0,1.515,51.0,,4.0
2,2014.0,1.0,19.0,2014-01-19,955.0,773.0,1800 POST OAK BLVD,HOUSTON,77056.0,-95.461478,29.7482,0.0,internal,HSTNTXNA,1.0,1.0,1.0,1.694,3.0,,4.0
3,2014.0,5.0,9.0,2014-05-09,871.0,706.0,17575 ALDINE WESTFIELD RD,HOUSTON,77073.0,-95.364653,29.976798,0.0,external,HSTNTXWE,1.0,6.0,0.0,0.728,18.0,,4.0
4,2014.0,1.0,30.0,2014-01-30,955.0,773.0,1800 POST OAK BLVD,HOUSTON,77056.0,-95.461478,29.7482,0.0,internal,HSTNTXNA,1.0,5.0,0.0,3.973,4.0,,4.0


In [5]:
## View column information
castbl.columnInfo()

Unnamed: 0,Column,Label,ID,Type,RawLength,FormattedLength,Format,NFL,NFD
0,Year,,1,double,8,12,,0,0
1,Month,Month,2,double,8,12,,0,0
2,Day,,3,double,8,12,,0,0
3,Date,,4,double,8,10,MMDDYY,10,0
4,Serial,,5,double,8,4,BEST,4,0
5,Property,,6,double,8,4,BEST,4,0
6,Address,,7,char,28,28,$CHAR,28,0
7,City,,8,char,7,7,$CHAR,7,0
8,Zip,,9,double,8,5,BEST,5,0
9,Lat,,10,double,8,10,BEST,10,0


In [6]:
# (castbl                            ## CAS table
#  .groupby('Year')['Zip']           ## group by year, only select the Zip column for the aggregation
#  .min(casout={'name':'minzip',     ## Find the minimum zip, create a new CAS table
#               'caslib':'casuser',
#               'replace':True})
# )              

## CAS Results Object

It's simply a dictionary. Here I'll store the action results. All actions return a casresults object. Again, just a dictionary.

If i'm using casout in an action, I like to just run the action and not store the result.

Then I reference the CAS table I created separately.

In [12]:
## Group the CAS table by Year and find the minimum/mean/max zip code by year.
cr = (castbl                                           ## CAS table
      .groupby('Year')                                ## Group by year
      .summary(inputs = ['Zip'],                      ## Specify the columns to analyze. Here just the zip column
               subSet = ['MIN','MEAN','MAX'],         ## Find the min, max and mean zip code (You can just find one if you want)
               casout ={'name':'zipyeargroup',        ## create a new CAS table
                        'caslib':'casuser',
                        'replace':True})
)

display(type(cr), cr)



swat.cas.results.CASResults

Unnamed: 0,casLib,Name,Rows,Columns,casTable
0,CASUSER(Peter.Styliadis@sas.com),zipyeargroup,2,6,"CASTable('zipyeargroup', caslib='CASUSER(Peter..."


## Reference the table the action created above

In [13]:
newtableIcreated = conn.CASTable('zipyeargroup', caslib = 'casuser')
display(newtableIcreated)

CASTable('zipyeargroup', caslib='casuser')

In [111]:
## Check available Cas tables
conn.tableInfo(caslib = 'casuser')

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,WATER_CLUSTER,46720,21,0,utf-8,2023-03-07T14:55:25+00:00,2023-03-07T14:55:25+00:00,2023-03-07T14:56:12+00:00,UTF8,1993820000.0,1993820000.0,1993820000.0,0,0,0,0,WATER_CLUSTER.sashdat,Samples,1,Peter.Styliadis@sas.com,,2021-12-18T18:35:04+00:00,1955472000.0
1,ZIPYEARGROUP,2,6,0,utf-8,2023-03-07T14:56:12+00:00,2023-03-07T14:56:12+00:00,2023-03-07T14:56:12+00:00,UTF8,1993820000.0,1993820000.0,1993820000.0,0,0,0,0,,,0,Peter.Styliadis@sas.com,,,
2,WARRANTY_FINAL,153217,33,0,utf-8,2023-02-28T23:52:08+00:00,2023-02-28T23:52:09+00:00,2023-02-28T23:55:33+00:00,UTF8,1993248000.0,1993248000.0,1993248000.0,1,0,0,0,warranty_final.sashdat,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,2023-02-28T23:50:36+00:00,1993247000.0
3,NEWJOINEDDATA,46720,23,0,utf-8,2023-03-06T15:47:09+00:00,2023-03-06T15:57:16+00:00,2023-03-06T16:07:55+00:00,UTF8,1993737000.0,1993737000.0,1993738000.0,1,0,0,0,,,0,Peter.Styliadis@sas.com,,,


In [112]:
## Preview new CAS table
zipyeargroup = conn.CASTable('zipyeargroup', caslib = 'casuser')
zipyeargroup.head()

Unnamed: 0,Year,Year_f,_Column_,_Min_,_Max_,_Mean_
0,2014.0,2014,Zip,77003.0,77094.0,77043.671875
1,2015.0,2015,Zip,77003.0,77094.0,77043.671875


View column information of the CAS table. Notice there are two year columns.
- The **Year** column is the original raw values
- Since CASTables can contain formats, it looks like there is a formatted year as character.

In [113]:
zipyeargroup.columnInfo()

Unnamed: 0,Column,Label,ID,Type,RawLength,FormattedLength,Format,NFL,NFD
0,Year,,1,double,8,12,,0,0
1,Year_f,,2,char,12,12,$,12,0
2,_Column_,,3,char,6,6,$,6,0
3,_Min_,,4,double,8,12,BEST,12,0
4,_Max_,,5,double,8,12,BEST,12,0
5,_Mean_,,6,double,8,12,BEST,12,0


## Now you can do it all in one step.
1. Execute the action and get the reference to the CAS table

In [18]:
tblreference =  (castbl                                           ## CAS table
                 .groupby('Year')                                ## Group by year
                 .summary(inputs = ['Zip'],                      ## Specify the columns to analyze. Here just the zip column
                          subSet = ['MIN','MEAN','MAX'],         ## Find the min, max and mean zip code (You can just find one if you want)
                          casout ={'name':'zipyeargroup',        ## create a new CAS table
                                   'caslib':'casuser',
                                   'replace':True})['OutputCasTables']
                 .loc[0,'casTable']
)

print(tblreference)

CASTable('zipyeargroup', caslib='CASUSER(Peter.Styliadis@sas.com)')


In [19]:
tblreference.head()

Unnamed: 0,Year,Year_f,_Column_,_Min_,_Max_,_Mean_
0,2014.0,2014,Zip,77003.0,77094.0,77043.671875
1,2015.0,2015,Zip,77003.0,77094.0,77043.671875


In [83]:
conn.terminate()