# Read Mutiple CSV Files

In [1]:
## Packages
import swat
import os
import pandas as pd
import numpy as np

## Options
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', None)

## custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('casConnect package not available')

## Make a Connection to CAS (REQUIRED: MODIFY CONNECTION INFORMATION)

##### To connect to the CAS server you will need:
1. the host name, 
2. the portnumber, 
3. your user name, and your password.

Visit the documentation [Getting Started with SAS® Viya® for Python](https://go.documentation.sas.com/doc/en/pgmsascdc/default/caspg3/titlepage.htm) for more information about connecting to CAS.

**Be aware that connecting to the CAS server can be implemented in various ways, so you might need to see your system administrator about how to make a connection. Please follow company policy regarding authentication.**

In [2]:
##
## Connect to CAS
##

## General connection syntax
# conn = swat.CAS(host, port, username, password)

## SAS Viya for Learners 3.5 connection
# hostValue = os.environ.get('CASHOST')
# portValue = os.environ.get('CASPORT')
# passwordToken=os.environ.get('SAS_VIYA_TOKEN')
# conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

## Personal connection
try:
    conn = connect_to_cas()
    print('CAS connection succesful')
    print(conn)
except:
    print('No connection')
    pass

CAS connection succesful
CAS('ssemonthly.demo.sas.com', 443, protocol='https', name='py-session-1', session='d8a0f080-43af-af43-bf80-df8a42bb4871')


## Enter your connection information to CAS below

In [51]:
## conn = swat.CAS()

## Create the data for the demonstration

xxxx

In [4]:
keepColumns = ['CustID', 'bucket', 'Class', 'trx_dow_new', 'Region', 'Region_2']

conn.loadTable(path = 'RAND_RETAILDEMO.sashdat', caslib = 'samples',
               casOut = {'name':'retail_sales', 
                         'caslib':'casuser', 
                         'replace':True},
               vars = keepColumns)

castbl = conn.CASTable('retail_sales', caslib = 'casuser')

castbl.head()

NOTE: Cloud Analytic Services made the file RAND_RETAILDEMO.sashdat available as table RETAIL_SALES in caslib CASUSER(Peter.Styliadis@sas.com).


Unnamed: 0,CustID,bucket,Class,trx_dow_new,Region,Region_2
0,1083863.0,1.0,oral care,TUE,US_AT,US Atlantic Coast
1,1083863.0,1.0,kids_swimwear,WED,US_AT,US Atlantic Coast
2,1083863.0,1.0,men_slippers,THU,US_MW,US Midwest
3,1083863.0,2.0,men_underwear,SAT,US_AT,US Atlantic Coast
4,1083883.0,2.0,DVD & Blu-ray,FRI,US_MW,US Midwest


## Load the fcmpact action set

In [5]:
conn.loadActionSet('fcmpact')

NOTE: Added action set 'fcmpact'.


In [15]:
conn.fcmpact?

[1;31mSignature:[0m   [0mconn[0m[1;33m.[0m[0mfcmpact[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mType:[0m        Fcmpact
[1;31mString form:[0m <swat.cas.actions.Fcmpact object at 0x000001B9212FCA90>
[1;31mFile:[0m        c:\users\pestyl\anaconda3\lib\site-packages\swat\cas\actions.py
[1;31mDocstring:[0m  
FCMP

Actions
-------
fcmpact.addprototypes : Adds the PROTO definitions and stores them in a table
fcmpact.addroutines   : Adds the FCMP routines and stores them in a table
fcmpact.loadfcmplibs  : Loads FCMP tables in the session CMP library
fcmpact.loadfcmptable : Loads a single FCMP table into memory
fcmpact.runprogram    : Defines the routine to be run

In [6]:
conn.listsessopts()

Unnamed: 0,Name,UsageType,Type,Value,Default Value,Group,Min,Max,Description
0,appTag,Session,String,,,Action,0,0,specifies the string to prefix to log messages.
1,caslib,Session,String,CASUSER(Peter.Styliadis@sas.com),,Caslib,0,0,specifies the caslib name to set as the active caslib.
2,collate,Session,String,UCA,UCA,Sort,0,0,specifies the collating sequence for sorting.
3,locale,Session,String,en_US,en_US,Localization,0,0,specifies the locale to use for sorting and formatting.
4,logFlushTime,Session,Integer,100,100,Log,-1,86400,"specifies the log flush time, in milliseconds. A value of -1 indicates to flush logs after each action completes. A value of 0 indicates to flush logs as they are produced."
5,yearcutoff,Session,Integer,1940,1940,Session,1582,19900,Specifies the first year of a 100-year span that is used by date informats and functions to read a two-digit year.
6,maxTableMem,Session,Integer,16777216,16777216,Caslib,0,0,"specifies the maximum amount of memory, in bytes, that each thread should allocate for in-memory blocks before converting to a memory-mapped file. Files are written in the directories that are specified in the CAS_DISK_CACHE environment variable. When you create a large output table, such as 5G or more for each worker, you can set this value to 256M or more to improve performance."
7,metrics,Session,Boolean,0,0,Log,0,1,"when set to True, action metrics are displayed."
8,nWorkers,Session,Integer,4,0,Session,0,5000,specifies the number of worker nodes associated with this session.
9,subsetSessionCopies,Session,Integer,0,0,Session,0,5000,specifies number of backup copies of blocks for in-memory tables to create in subset sessions.


In [7]:
myUDF = '''
    function times4( a );
        return(a*4);
    endsub;
'''

conn.addroutines(routineCode = myUDF,
                 package = "OtherStuff",
                 saveTable = True,
                 appendTable = True,
                 funcTable = {'name':"multiplication", 'caslib':'casuser', 'replace':True})

NOTE: Cloud Analytic Services saved the file MULTIPLICATION.sashdat in caslib CASUSER(Peter.Styliadis@sas.com).


In [8]:
x = conn.CASTable('multiplication', caslib = 'casuser')
x.head(40)

Unnamed: 0,_Key_,Owner,Sequence,Type,Subtype,Name,Continue,NValue,Encoded,Value
0,OTHERSTUFF,CMP,0.0,Header,Package,,0.0,,,"<L n=""Header""><S n=""Version""><![CDATA[1.1]]></S><N n=""Datetime"">2001680465.9</N><S n=""DatetimeStr""><![CDATA[2001680465.9]]></S><S n=""SubType""><![CDATA[Package]]></S><N n=""Obfuscate"">0</N></L>"
1,F.OTHERSTUFF.TIMES4,CMP,0.0,Prototype,FCmp,OtherStuff,0.0,,,"<L n=""Prototype""><S n=""Name""><![CDATA[times4]]></S><S n=""Group""><![CDATA[]]></S><N n=""MaxLag"">0</N><N n=""Flag0"">0</N><N n=""Flag1"">128</N><S n=""ReturnType""><![CDATA[n]]></S><N n=""RetSubType"">1</N><N n=""ReturnSize"">8</N><L n=""ArgList""><L n=""Arg""><S n=""Name""><![CDATA[a]]></S><S n=""Kind""><![CDATA[v]]></S><S n=""Type""><![CDATA[n]]></S><N n=""Subtype"">1</N><S n=""Class""><![CDATA[n]]></S><N n=""Status"">0</N><N n=""Status2"">0</N><N n=""NInit"">0</N><N n=""MaxLag"">0</N><N n=""Size"">8</N><N n=""MaxChars"">0</N><N n=""Flag1"">192</N><N n=""Flag2"">0</N><N n=""Flag3"">0</N><N n=""Flag4"">0</N><N n=""Flag5"">64</N><N n=""Flag6"">32</N><N n=""Flag7"">0</N><N n=""Flag8"">0</N></L></L></L>"
2,F.OTHERSTUFF.TIMES4,CMP,1.0,Header,Function,,0.0,,,"<L n=""Header""><S n=""Version""><![CDATA[1.1]]></S><N n=""Datetime"">2001680465.9</N><S n=""DatetimeStr""><![CDATA[2001680465.9]]></S><S n=""SubType""><![CDATA[Function]]></S><N n=""Obfuscate"">0</N><S n=""Package""><![CDATA[OtherStuff]]></S></L>"
3,F.OTHERSTUFF.TIMES4,CMP,2.0,Statement Source,Executable,FUNCTION,0.0,65.0,,function times4 ( a ) ;
4,F.OTHERSTUFF.TIMES4,CMP,3.0,Statement Source,Executable,return,0.0,1.0,,return ( a * 4 ) ;
5,F.OTHERSTUFF.TIMES4,CMP,4.0,Statement Source,Executable,endsub,0.0,14.0,,endsub ;
6,F.OTHERSTUFF.TIMES4,CMP,5.0,Symbol,,_HOSTNAME_,0.0,,,"<L n=""Symbol""><S n=""Name""><![CDATA[_HOSTNAME_]]></S><S n=""Kind""><![CDATA[v]]></S><S n=""Type""><![CDATA[c]]></S><N n=""Subtype"">64</N><S n=""Class""><![CDATA[n]]></S><N n=""Status"">0</N><N n=""Status2"">0</N><N n=""NInit"">0</N><N n=""MaxLag"">0</N><N n=""Size"">32</N><N n=""MaxChars"">0</N><N n=""Flag1"">0</N><N n=""Flag2"">0</N><N n=""Flag3"">32</N><N n=""Flag4"">0</N><N n=""Flag5"">0</N><N n=""Flag6"">0</N><N n=""Flag7"">0</N><N n=""Flag8"">0</N></L>"


x = conn.CASTabl

In [9]:
conn.setSessOpt(cmplib='casuser.multiplication')

In [10]:
castbl.eval('x3 = times4(3)')

In [11]:
castbl.head()

Unnamed: 0,CustID,bucket,Class,trx_dow_new,Region,Region_2,x3
0,1083863.0,1.0,oral care,TUE,US_AT,US Atlantic Coast,12.0
1,1083863.0,1.0,kids_swimwear,WED,US_AT,US Atlantic Coast,12.0
2,1083863.0,1.0,men_slippers,THU,US_MW,US Midwest,12.0
3,1083863.0,2.0,men_underwear,SAT,US_AT,US Atlantic Coast,12.0
4,1083883.0,2.0,DVD & Blu-ray,FRI,US_MW,US Midwest,12.0


Hook - PROC FCMP

PROC FCMP base SAS - 

FCMP - CAS (distributed)


CASL - DS2 Course Lesson predefined packages (FCMP package) 

In [12]:
conn.tableInfo(caslib = 'casuser')

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,RETAIL_SALES,930046,6,0,utf-8,2023-06-06T14:18:47+00:00,2023-06-06T14:18:47+00:00,2023-06-06T14:28:25+00:00,UTF8,2001680000.0,2001680000.0,2001681000.0,0,0,0,0,RAND_RETAILDEMO.sashdat,Samples,0,Peter.Styliadis@sas.com,,2021-12-18T18:35:04+00:00,1955472000.0
1,MULTIPLICATION,7,10,0,utf-8,2023-06-06T14:21:06+00:00,2023-06-06T14:21:06+00:00,2023-06-06T14:28:25+00:00,UTF8,2001680000.0,2001680000.0,2001681000.0,0,1,0,0,,,0,Peter.Styliadis@sas.com,,,


Try the loadtable action and see what happens.

In [56]:
conn.fcmpact.loadfcmptable(table = 'MULTIPLICATION.sashdat', caslib = 'casuser')

NOTE: Cloud Analytic Services made the file MULTIPLICATION.sashdat available as table MULTIPLICATION in caslib CASUSER(Peter.Styliadis@sas.com).


In [57]:
conn.tableInfo(caslib = 'casuser')

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,RETAIL_SALES,930046,6,0,utf-8,2023-05-31T23:20:08+00:00,2023-05-31T23:20:08+00:00,2023-05-31T23:20:09+00:00,UTF8,2001194000.0,2001194000.0,2001194000.0,0,0,0,0,RAND_RETAILDEMO.sashdat,Samples,0,Peter.Styliadis@sas.com,,2021-12-18T18:35:04+00:00,1955472000.0
1,MULTIPLICATION,19,10,0,utf-8,2023-05-31T23:24:34+00:00,2023-05-31T23:24:34+00:00,2023-05-31T23:24:34+00:00,UTF8,2001195000.0,2001195000.0,2001195000.0,0,0,0,0,MULTIPLICATION.sashdat,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,2023-05-31T23:12:26+00:00,2001194000.0
2,2023-05-28_WORDLELIST,14855,11,0,utf-8,2023-05-28T15:13:44+00:00,2023-05-28T15:13:44+00:00,2023-05-30T15:56:48+00:00,UTF8,2000906000.0,2000906000.0,2001081000.0,1,0,0,0,2023-05-28_wordleList.csv,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,2023-05-28T15:12:28+00:00,2000906000.0
3,2023-05-28_ALLLETTERS,74275,3,0,utf-8,2023-05-28T15:15:50+00:00,2023-05-28T15:15:50+00:00,2023-05-30T15:58:19+00:00,UTF8,2000906000.0,2000906000.0,2001081000.0,1,0,0,0,2023-05-28_allletters.csv,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,2023-05-28T15:12:34+00:00,2000906000.0


In [53]:
conn.fileInfo(caslib = 'casuser')

Unnamed: 0,Permission,Owner,Group,Name,Size,Encryption,Time,ModTime
0,-rwxr-xr-x,sas,sas,cars.sas7bdat,139264,,2023-02-23T14:21:31+00:00,1992781000.0
1,-rwxr-xr-x,sas,sas,previoussales.sas7bdat,73728,,2023-04-26T20:22:48+00:00,1998160000.0
2,-rwxr-xr-x,sas,sas,VTI.sashdat,413080,NONE,2022-10-11T13:40:38+00:00,1981115000.0
3,-rwxr-xr-x,sas,sas,hmeq.sashdat,630384,NONE,2022-10-13T17:56:59+00:00,1981303000.0
4,-rwxr-xr-x,sas,sas,tsa_claims_raw.csv,34936205,,2023-01-16T13:13:53+00:00,1989494000.0
5,-rwxr-xr-x,sas,sas,warranty_demo.csv,53297896,,2023-05-30T13:10:29+00:00,2001071000.0
6,-rwxr-xr-x,sas,sas,warranty_final.sashdat,115538808,NONE,2023-05-30T13:52:52+00:00,2001074000.0
7,-rwxr-xr-x,sas,sas,cars.parquet,4096,NONE,2022-11-17T14:19:19+00:00,1984314000.0
8,-rwxr-xr-x,sas,sas,RAND_RETAILDEMO.csv,240072190,,2023-05-30T13:10:38+00:00,2001071000.0
9,-rwxr-xr-x,sas,sas,warranty_final.csv,43615117,,2023-05-30T13:52:57+00:00,2001074000.0


## Terminate the CAS session

In [46]:
conn.terminate()