# Read Mutiple CSV Files

In [113]:
## Packages
import swat
import os
import pandas as pd
import numpy as np

## Options
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', None)

## custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

    
    
######################
## Connect to CAS   ##
######################

## General CAS connection syntax
# conn = swat.CAS(host, port, username, password)


## My personal connection to CAS. You will need to modify your connection object
try:
    conn = connect_to_cas()
except:
    print('My personal connection to CAS. You will need to modify yours using your connection information.')

## Viya for Learners 3.5 connection
hostValue = os.environ.get('CASHOST')
portValue = os.environ.get('CASPORT')
passwordToken=os.environ.get('SAS_VIYA_TOKEN')
conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)

CasConnect package not available
My personal connection to CAS. You will need to modify yours using your connection information.


## Prepare data

In [117]:
def prep_data():
    
    ## Load the WARRANTY_CLAIMS_0117.sashdat from the Samples caslib into memory in Casuser
    conn.loadTable(path='WARRANTY_CLAIMS_0117.sashdat', caslib='samples',
                   casout={'name':'warranty_claims', 
                           'caslib':'casuser',
                           'replace':True})
    ##
    ## DATA PREP
    ## 
    
    ## Reference the CAS table in an object
    castbl = conn.CASTable('warranty_claims', caslib = 'casuser')
 
    ## Store the column names and labels in a dataframe
    df_col_names = castbl.columnInfo()['ColumnInfo'].loc[:,['Column','Label']]
 
    ## Create a list of dictionaries of how to rename each column using the column labels
    renameColumns = []
    for row in df_col_names.iterrows():
        colName = row[1].values[0]
        labelName = row[1].values[1].replace(' ','_')
        renameColumns.append(dict(name=colName, rename=labelName))
 
    ## List of columns to keep in the CAS table
    keepColumns = {'Campaign_Type', 'Platform','Trim_Level','Make','Model_Year','Engine_Model',
                   'Vehicle_Assembly_Plant','Claim_Repair_Start_Date', 'Claim_Repair_End_Date'}
 
    ## Rename and drop columns to make the table easier to use
    castbl.alterTable(columns = renameColumns, keep = keepColumns)
 
    ## Return the CASTable object reference
    return castbl



def save_cas_table_as_csv_files(cas_table_reference):
    
    ## Create a subdirectory in the Casuser caslib named csv_file_blogs
    conn.addCaslibSubdir(name = 'casuser', path = 'csv_file_blogs')
    
    ## Create a CSV file for each year
    for year in list(castbl.Model_Year.unique()):      
        (cas_table_reference
         .query(f"Model_Year ='{year}'")
         .save(name = f'csv_file_blogs/warranty_claims_{year}.csv', 
               caslib = 'casuser',
               replace = True)
        )
    
    ## View files in the csv_file_blogs subdirectory
    fi = conn.fileInfo(allFiles = True, caslib = 'casuser')
    fi_subdir = conn.fileInfo(path = 'csv_file_blogs', caslib = 'casuser')
    display(fi, fi_subdir)

    
## Create the CAS table
castbl = prep_data()

## Save the CAS table as a CSV file for each year
save_cas_table_as_csv_files(castbl)

NOTE: Cloud Analytic Services made the file WARRANTY_CLAIMS_0117.sashdat available as table WARRANTY_CLAIMS in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services saved the file csv_file_blogs/warranty_claims_2015.csv in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services saved the file csv_file_blogs/warranty_claims_2016.csv in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services saved the file csv_file_blogs/warranty_claims_2017.csv in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services saved the file csv_file_blogs/warranty_claims_2018.csv in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services saved the file csv_file_blogs/warranty_claims_2019.csv in caslib CASUSER(Peter.Styliadis@sas.com).


Unnamed: 0,Permission,Owner,Group,Name,Size,Encryption,Time,ModTime
0,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,sales.sas7bdat,73728,,2023-02-06T14:19:30-04:00,1991327000.0
1,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,sales.csv,10506,,2022-12-09T12:14:52-04:00,1986222000.0
2,drwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,csv_file_blogs,4096,,2023-03-27T10:26:33-04:00,1995546000.0
3,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,heart_raw.sashdat,1051328,NONE,2022-10-12T13:06:07-04:00,1981214000.0
4,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,products.xlsx,225072,,2022-12-09T12:15:02-04:00,1986222000.0
5,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,orders_hd.sashdat,1728621720,NONE,2022-12-09T12:15:01-04:00,1986222000.0
6,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,tsa_claims_raw.csv,34936237,,2023-01-04T13:50:33-04:00,1988474000.0
7,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2015.csv,144481,,2023-03-27T09:07:18-04:00,1995542000.0
8,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_demo.csv,53297896,,2023-02-27T20:01:49-04:00,1993162000.0
9,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,myfinaltable.sashdat,74432416,NONE,2023-03-13T13:01:42-04:00,1994346000.0


Unnamed: 0,Permission,Owner,Group,Name,Size,Encryption,Time,ModTime
0,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2015.csv,144481,,2023-03-27T10:26:32-04:00,1995546000.0
1,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2016.csv,1853574,,2023-03-27T10:26:32-04:00,1995546000.0
2,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2017.csv,3994542,,2023-03-27T10:26:33-04:00,1995546000.0
3,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2018.csv,2485684,,2023-03-27T10:26:33-04:00,1995546000.0
4,-rwxr-xr-x,Peter.Styliadis@sas.com,v4e_users,warranty_claims_2019.csv,197103,,2023-03-27T10:26:33-04:00,1995546000.0


## Load all of the CSV files

In [118]:
conn.tableInfo(caslib = 'casuser')

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,ALLCSVFILES,153217,9,0,utf-8,2023-03-27T10:26:22-04:00,2023-03-27T10:26:22-04:00,2023-03-27T10:26:22-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,csv_file_blogs,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,,
1,WARRANTY_CLAIMS,153217,9,0,utf-8,2023-03-27T10:26:32-04:00,2023-03-27T10:26:32-04:00,2023-03-27T10:26:33-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,WARRANTY_CLAIMS_0117.sashdat,Samples,1,Peter.Styliadis@sas.com,,2020-02-20T00:20:58-04:00,1897792000.0


In [119]:
conn.loadTable(path="csv_file_blogs", caslib = 'casuser',  ## Specify the subdirectory name (csv_file_blogs) and the input caslib name
               importOptions = {                           ## Specify the import options
                   'fileType' : 'CSV',
                   'multiFile' : True
               },
               casOut = {                                  ## Specify output CAS table information
                   'name' : 'allCSVFiles',
                   'caslib' : 'casuser',
                   'replace' : True
               })

NOTE: The file, '/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2015.csv' was used to create the CAS Table column names.
NOTE: The CSV file table load for table, 'allCSVFiles' produced 153217 rows from 5 files.
NOTE: Cloud Analytic Services made the file csv_file_blogs available as table ALLCSVFILES in caslib CASUSER(Peter.Styliadis@sas.com).


In [120]:
conn.tableInfo()

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,WARRANTY_CLAIMS,153217,9,0,utf-8,2023-03-27T10:26:32-04:00,2023-03-27T10:26:32-04:00,2023-03-27T10:26:33-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,WARRANTY_CLAIMS_0117.sashdat,Samples,1,Peter.Styliadis@sas.com,,2020-02-20T00:20:58-04:00,1897792000.0
1,ALLCSVFILES,153217,9,0,utf-8,2023-03-27T10:26:56-04:00,2023-03-27T10:26:56-04:00,2023-03-27T10:26:56-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,csv_file_blogs,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,,


In [121]:
allcsvfilesTbl = conn.CASTable('allcsvfiles', caslib = 'casuser')
allcsvfilesTbl.head()

Unnamed: 0,Engine_Model,Vehicle_Assembly_Plant,Claim_Repair_End_Date,Campaign_Type,Platform,Claim_Repair_Start_Date,Trim_Level,Make,Model_Year
0,8 cylinder,Pittsburgh,20554.0,Type 6,XE,20551.0,110.0,Zeus,2016.0
1,8 cylinder,Charlotte,21087.0,Type 6,XE,21084.0,95.0,Titan,2016.0
2,8 cylinder,Charlotte,21099.0,Type 6,XE,21096.0,122.0,Zeus,2016.0
3,4 cylinder,Pittsburgh,20527.0,Type 6,XE,20524.0,110.0,Zeus,2016.0
4,4 cylinder,Detroit,21071.0,Type 6,XE,21064.0,110.0,Zeus,2016.0


## Add input file name and path columns

In [122]:
conn.loadTable(path="csv_file_blogs", caslib = 'casuser',  ## Specify the subdirectory name (csv_file_blogs) and the input caslib name
               importOptions = {                           ## Specify the import options
                   'fileType' : 'CSV',
                   'multiFile' : True,                     
                   'showFile' : True,
                   'showFullPath' : True
               },
               casOut = {                                  ## Specify output CAS table information
                   'name' : 'allCSVFiles_path_info',
                   'caslib' : 'casuser',
                   'replace' : True
               })

NOTE: The file, '/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2015.csv' was used to create the CAS Table column names.
NOTE: The CSV file table load for table, 'allCSVFiles_path_info' produced 153217 rows from 5 files.
NOTE: Cloud Analytic Services made the file csv_file_blogs available as table ALLCSVFILES_PATH_INFO in caslib CASUSER(Peter.Styliadis@sas.com).


In [123]:
conn.tableInfo()

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,WARRANTY_CLAIMS,153217,9,0,utf-8,2023-03-27T10:26:32-04:00,2023-03-27T10:26:32-04:00,2023-03-27T10:26:33-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,WARRANTY_CLAIMS_0117.sashdat,Samples,1,Peter.Styliadis@sas.com,,2020-02-20T00:20:58-04:00,1897792000.0
1,ALLCSVFILES,153217,9,0,utf-8,2023-03-27T10:26:56-04:00,2023-03-27T10:26:56-04:00,2023-03-27T10:27:02-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,csv_file_blogs,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,,
2,ALLCSVFILES_PATH_INFO,153217,11,0,utf-8,2023-03-27T10:27:41-04:00,2023-03-27T10:27:41-04:00,2023-03-27T10:27:41-04:00,UTF8,1995546000.0,1995546000.0,1995546000.0,0,0,0,0,csv_file_blogs,CASUSER(Peter.Styliadis@sas.com),0,Peter.Styliadis@sas.com,,,


In [124]:
allcsvfiles_path_infoTbl = conn.CASTable('allcsvfiles_path_info', caslib = 'casuser')
allcsvfiles_path_infoTbl.head()

Unnamed: 0,fullPath,fileName,Engine_Model,Vehicle_Assembly_Plant,Claim_Repair_End_Date,Campaign_Type,Platform,Claim_Repair_Start_Date,Trim_Level,Make,Model_Year
0,/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2018.csv,warranty_claims_2018.csv,8 cylinder,Charlotte,21195.0,Type 6,XE,21192.0,110.0,Zeus,2018.0
1,/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2018.csv,warranty_claims_2018.csv,4 cylinder,Detroit,21252.0,Type 6,XE,21249.0,110.0,Zeus,2018.0
2,/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2018.csv,warranty_claims_2018.csv,Turbo,Charlotte,21332.0,Type 6,GX,21329.0,30.0,Titan,2018.0
3,/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2018.csv,warranty_claims_2018.csv,4 cylinder,Charlotte,21332.0,Type 6,XE,21329.0,110.0,Zeus,2018.0
4,/shared/home/Peter.Styliadis@sas.com/casuser/csv_file_blogs/warranty_claims_2018.csv,warranty_claims_2018.csv,4 cylinder,Charlotte,21191.0,Type 6,XE,21188.0,110.0,Zeus,2018.0


## Delete all demo files

Get a list of all the files in the csv_file_blogs subdirectory.

In [126]:
allFiles = (conn.fileInfo(path = 'csv_file_blogs', caslib = 'casuser')['FileInfo']
            .loc[:,'Name']
            .to_list()
           )

allFiles

['warranty_claims_2015.csv',
 'warranty_claims_2016.csv',
 'warranty_claims_2017.csv',
 'warranty_claims_2018.csv',
 'warranty_claims_2019.csv']

Delete each CSV file.

In [127]:
for file in allFiles:
    conn.deleteSource(source = f'csv_file_blogs/{file}', caslib = 'casuser')

NOTE: Cloud Analytic Services removed the source data csv_file_blogs/warranty_claims_2015.csv from caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services removed the source data csv_file_blogs/warranty_claims_2016.csv from caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services removed the source data csv_file_blogs/warranty_claims_2017.csv from caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services removed the source data csv_file_blogs/warranty_claims_2018.csv from caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services removed the source data csv_file_blogs/warranty_claims_2019.csv from caslib CASUSER(Peter.Styliadis@sas.com).


Delete the subdirectory csv_file_blogs.

In [128]:
conn.deleteSource(source = 'csv_file_blogs', caslib = 'casuser')

NOTE: Cloud Analytic Services removed the source data csv_file_blogs from caslib CASUSER(Peter.Styliadis@sas.com).


## Terminate the CAS session

In [129]:
conn.terminate()