# Getting Started with Python Integration to SAS Viya

In [None]:
import swat
import pandas as pd
import numpy as np
import os
import sys
pd.set_option('display.max_columns', None)

print(f'Python version:{sys.version.split("|")[0]}')
print(f'swat version:{swat.__version__}')
print(f'pandas version:{pd.__version__}')
print(f'numpy version:{np.__version__}')

## 1. Connect to the CAS Server
**To connect to the CAS server, you need:**
- the host name, 
- the port number, 
- authentication

**Be aware that connecting to the CAS server can be implemented in various ways, so you might need to see your system administrator about how to make a connection. Please follow company policy regarding authentication.**

In [None]:
######## SAS VIYA ON AZURE MARKETPLACE CONNECTION #################################################
casport = 5570
cashost = 'sas-cas-server-default-client'
conn = swat.CAS(cashost, casport, password=os.environ.get('ACCESS_TOKEN'))
###################################################################################################

######## EXTERNAL JUPYTERHUB ACCESS USING BINARY PROTOCOL###################
#cashost ='<CAS_IP>'
#conn = swat.CAS(cashost, casport, '<##user##>', '<##password##>')
############################################################################

#######################EXTERNAL JUPYTERHUB ACCESS USING HTTP PROTOCOL ##############################################################################
#conn = swat.CAS('https://<##prefix##>.<##region##>.cloudapp.azure.com/cas-shared-default-http', username='<##user##>', password ='<##password##>')
####################################################################################################################################################

In [None]:
type(conn)

Test the CAS connection and view the SAS Viya version.

In [None]:
conn.about()['About']['Viya Version']

## 2. Explore the Available Data on the CAS Server 

### a. View Available Caslibs

In [None]:
conn.caslibInfo()

### b. View Available Data Source Files

In [None]:
conn.fileInfo(caslib = 'samples')

In [None]:
conn.fileInfo(caslib = 'casuser')

### c. View Available CAS Tables

In [None]:
conn.tableInfo(caslib = 'samples')

In [None]:
conn.tableInfo(caslib = 'casuser')

## 3. Load Data into Memory on the CAS Server (Client Side)

In [None]:
conn.read_csv(r'https://support.sas.com/documentation/onlinedoc/viya/exampledatasets/home_equity.csv', ## Client-side file to load into memory
              casout = {'name':'home_equity',                                                          ## Output in-memory CAS table information
                        'caslib':'casuser', 
                        'replace':True})

In [None]:
conn.tableInfo(caslib = 'casuser')

## 4. Explore the CAS Table

### a. Reference the CAS table

In [None]:
castbl = conn.CASTable('home_equity', caslib = 'casuser')

display(type(castbl), castbl)

In [None]:
castbl.tableDetails()

### b. Preview the CAS Table

You can execute the SWAT head method on a CASTable object to return five rows to your Python client. The head method executes in the CAS server, and the CAS server returns five rows to the Python client as a SASDataFrame object.

In [None]:
df = castbl.head()

display(type(df), df)

### c. Basic CAS Table Exploration

View the number of rows and columns in a CAS table.

In [None]:
castbl.shape

Show CAS table column information.

In [None]:
castbl.columnInfo()

Find the count of unique values in a CAS table using the SWAT package value_counts method. The CAS server summarizes the data and return a series to the Python client.

In [None]:
df = (castbl                ## CAS table
      .JOB                  ## CAS table column
      .value_counts()       ## SWAT package value_counts method
)


## Display the type and value of the df object
display(type(df), df)


## Plot the Series on the Python client using Pandas
df.plot(kind='bar', figsize=(8,6));

### d. Execute SQL in the CAS Server

In [None]:
## Load the fedSQL action set to execute SQL in CAS.
conn.loadActionSet('fedSQL')

## Store a simple SQL query
myQuery = '''
    select Reason, count(*) as TotalCount
        from casuser.home_equity
        group by Reason
        order by TotalCount desc
'''

## Execute the query in the CAS server
cr = conn.execDirect(query = myQuery)

display(type(cr), cr)

In [None]:
cr['Result Set']

## 5. Prepre the CAS Table

In [None]:
castbl.head()

### a. Create CAS Table Columns

Create two new columns in the CAS table.

In [None]:
castbl.eval('DIFF = VALUE - MORTDUE')
castbl.eval("LOAN_STATUS = ifc(BAD=0,'Paid','Default')")

View the CASTable object.

In [None]:
display(castbl)

In [None]:
castbl.params

Execute the head method on the CASTable object. Notice that the new columns were created.

In [None]:
castbl.head()

### b. Create a New CAS Table

Create a new CAS table that contains the two new columns from above.

In [None]:
castbl.copyTable(casout={'name':'home_equity_final', 
                         'caslib':'casuser', 
                         'label':'home_equity with two new calculated columns'})

Confirm that the table is available.

In [None]:
conn.tableInfo(caslib = 'casuser')

Reference and preview the new **HOME_EQUITY_FINAL** CAS table.

In [None]:
final_castbl = conn.CASTable('HOME_EQUITY_FINAL', caslib = 'casuser')
final_castbl.head()

### c. Save the CAS Table to the Data Source
Save a CAS table to a caslib's data source. This is simalar to saving a DataFrame back to disk using a to_ method in pandas.

In [None]:
save_file_types = ['parquet', 'sashdat', 'csv']

for ftype in save_file_types:
    final_castbl.save(name = f'home_equity_final.{ftype}', caslib = 'casuser')

View the newly saved files in the **Casuser** caslib.

In [None]:
conn.fileInfo(caslib = 'casuser')

## 6. Session-Scope versus Global-Scope Tables
By default, when you load a table into memory, the table has session scope. This means that the table is available only to the session that it was created in. For ad hoc data access and analysis, session-scope tables are preferred because session-scope tables do not require access control checks or any form of locking for concurrent access.

The only disadvantage to a session-scope table is that no other sessions can access the same table. For example, if you want shared access to a single copy of an in-memory table, then a session-scope table does not work. In that case, a global-scope table can provide the shared access.

**Session-scope tables**
- Best used for general purpose programming.
- Typically provide better performance than global-scope tables because concurrency locks are not used.

**Global-scope tables**
- Best used for tables that are accessed by a large number of users, especially other SAS Viya visual interfaces.
- A global-scope table cannot be replaced. You must drop it and load the replacement data.

### a. Session-Scope Tables

In [None]:
conn.tableInfo(caslib = 'casuser')

View the data source files in the **Casuser** caslib. Notice the parquet file is available.

In [None]:
conn.fileInfo(caslib = 'casuser')

### b. Global-Scope Tables

Load the server side parquet file into memory and promote it to global scope. This enables other SAS Viya applications and users who have access to the caslib to access the CAS table.

In [None]:
conn.loadTable(path = 'home_equity_final.parquet', caslib = 'casuser',   ## Input file to load into memory
               casout = {'name':'home_equity_final_global',                     ## Output CAS table information
                         'caslib':'casuser', 
                         'promote':True})                          

View available CAS tables. Notice that the Global value is 1.

In [None]:
conn.tableInfo(caslib = 'casuser')

### c. Open SAS Visual Analytics and Access the CAS Table

View available CAS tables in SAS Visual Analytics.

### d. Drop a Global-Scope Table

In [None]:
# ## Terminate the connection to the CAS server
conn.terminate()

##
## Reconnect to the CAS server
##

######## SAS VIYA ON AZURE MARKETPLACE CONNECTION #################################################
conn = swat.CAS(cashost, casport, password=os.environ.get('ACCESS_TOKEN'))
###################################################################################################

######## EXTERNAL JUPYTERHUB ACCESS USING BINARY PROTOCOL###################
#cashost ='<CAS_IP>'
#conn = CAS(cashost, casport, '<##user##>', '<##password##>')
############################################################################

#######################EXTERNAL JUPYTERHUB ACCESS USING HTTP PROTOCOL ##############################################################################
#conn = swat.CAS('https://<##prefix##>.<##region##>.cloudapp.azure.com/cas-shared-default-http', username='<##user##>', password ='<##password##>')
####################################################################################################################################################

## View available CAS tables in the Casuser caslib
conn.tableInfo(caslib = 'casuser')

Drop a CAS table.

In [None]:
conn.dropTable('home_equity_final_global', caslib = 'casuser')

### e. Delete a Data Source File

In [None]:
deleteDataSourceFiles = ['home_equity_final.parquet', 'home_equity_final.sashdat', 'home_equity_final.csv']

for file in deleteDataSourceFiles:
    conn.deleteSource(source = file, caslib = 'casuser')

## 7. Terminate the CAS Connection
When you are done, it's best practice to terminate your CAS connection.

In [None]:
conn.terminate()