# Create User Defined Functions (UDFs) in CAS
SAS Blog: [Python Integration to SAS® Viya® - Part 22 - Create User Defined Functions (UDFs)](https://blogs.sas.com/content/sgf/2023/10/11/python-integration-to-sas-viya-part-22-create-user-defined-functions-udfs/)

In [1]:
## Packages
import swat
import sys
import os
import pandas as pd
import numpy as np

## My custom package to connect to the CAS Server. Will not work in your environment.
try:
    from casauth import CASAuth
    print('Imported personal custom CAS auth package')
except:
    print('casauth package not available')


print(f'Python version:{sys.version.split("|")[0]}')
print(f'swat version:{swat.__version__}')
print(f'pandas version:{pd.__version__}')
print(f'numpy version:{np.__version__}')

Imported personal custom CAS auth package
Python version:3.8.16 (default, Mar  2 2023, 03:18:16) [MSC v.1916 64 bit (AMD64)]
swat version:1.13.1
pandas version:1.5.3
numpy version:1.24.3


## Make a Connection to CAS (REQUIRED: MODIFY CONNECTION INFORMATION)

##### To connect to the CAS server you will need:
1. the host name, 
2. the portnumber, 
3. your user name, and your password.

Visit the documentation [Getting Started with SAS® Viya® for Python](https://go.documentation.sas.com/doc/en/pgmsascdc/default/caspg3/titlepage.htm) for more information about connecting to CAS.

**Be aware that connecting to the CAS server can be implemented in various ways, so you might need to see your system administrator about how to make a connection. Please follow company policy regarding authentication.**

In [None]:
##
## Connect to CAS
##

################################
## General connection syntax  ##
################################
# conn = swat.CAS(host, port, username, password)

############################################
## SAS Viya for Learners 3.5 connection   ##
############################################
# hostValue = os.environ.get('CASHOST')
# portValue = os.environ.get('CASPORT')
# passwordToken=os.environ.get('SAS_VIYA_TOKEN')
# conn = swat.CAS(hostname=hostValue, port=portValue, password=passwordToken)


##############################
## My Personal connection   ##
##############################
try:
    path = os.getenv('CAS_CREDENTIALS')
    pem_file = os.getenv('CAS_CLIENT_SSL_CA_LIST')
    conn = CASAuth(path, ssl_ca_list = pem_file)
except:
    print('No connection')
    pass

## Enter your connection information to CAS below

In [4]:
## conn = swat.CAS()

## Create the fake data for the demonstration

In [None]:
## Create a pandas DataFrame
df = pd.DataFrame([
                "HighTemp = 83; LowTemp = 55;", 
                "HighTemp = 86; LowTemp = 59;", 
                "HighTemp = 92; LowTemp = 63;", 
                "HighTemp = 91; LowTemp = 65;", 
                "HighTemp = 80; LowTemp = 51;", 
                  ], 
             columns=["Temp"])

## Load the pandas DataFrame to the CAS server as a distributed CAS table
castbl = conn.upload_frame(df, casout = {'name':'SAMPLE_DATA', 'caslib':'casuser', 'replace':True})

## Preview the CAS table
display(conn.tableInfo(caslib = 'casuser'),    ## Display available in-memory CAS tables
        castbl.head())                         ## Display 5 rows of the castbl

## Create a User Defined Function (UDF)

### Pandas solution
If I wanted to solve this problem by creating specific UDFs in pandas, I might do something like this.

In [None]:
##
## Create function to get desired number (this is nice with Pandas)
##


# Separate statements in a column and obtain the number
def get_numeric_value(data, column, position):
    return (data[column]
            .str.split(';')          ## Split all statements by ;
            .str[position -1]        ## Get the statement by position (Use 1,2,3 instead of 0,1,2)
            .str.split(' ')          ## Split the single statement by a space
            .str[-1]                 ## Pull the last element (the number)
            .astype('int'))          ## Convert the characer number column to a numeric column


## Convert Fahrenheit to Celsius
def convert_f_to_c(data, col):
    temp_column = data[col]
    celsius_temp = ((temp_column - 32) * (5/9)).astype('int')
    return celsius_temp


## Create the final DataFrame
final_df = (df
            .assign(
                 HighTemp_F = get_numeric_value(df,'Temp',1),
                 LowTemp_F = get_numeric_value(df,'Temp',2),
                 HighTemp_C = lambda _df: convert_f_to_c(_df, 'HighTemp_F'),
                 LowTemp_C = lambda _df: convert_f_to_c(_df, 'LowTemp_F')
            )
)

final_df.head()

### CAS solution
Remember the CAS solution will run on the CAS cluster for massively parallel processing (MPP)

#### Load the fcmpact action set

In [None]:
conn.loadActionSet('fcmpact')

View available actions.

In [None]:
conn.fcmpact?

#### Create the UDF in CAS

Comparing the SAS functions to the Pandas method.

| SAS function | Pandas method |
| --- | --- |
| SCAN | SPLIT + [*n*] |
| INPUT | ASTYPE |

In [None]:
##
## Create CAS UDFs
##

## Separate statements in a column and obtain the number
get_temp_value_func = '''
    function get_temp_value(colname $, position);
        
        /* Get the statement by position */
        get_statement_from_position = scan(colname, position,';');
        
        /* Get the number from the string */
        get_number_as_string = scan(get_statement_from_position, -1, ' ');
        
        /* Get the number from the statement and convert to a numeric column */
        convert_string_to_numeric = input(get_number_as_string, 8.);
        
        /* Return numeric value */
        return(convert_string_to_numeric);
        
    endsub;
'''

## Convert Fahrenheit to Celsius
f_to_c_func = '''
    function f_to_c(f_temp);
        
        /* Convert the Fahrenheit temp to Celsius */
        c_temp = round((f_temp - 32) * (5/9));
        
        /* Return celsius value */
        return(c_temp);
        
    endsub;
'''

## Add UDF to functions CAS table
conn.addroutines(routineCode = get_temp_value_func + f_to_c_func,
                 saveTable = True,
                 funcTable = {'name':"my_udfs", 'caslib':'casuser'},
                 appendTable = True)

In [None]:
conn.tableInfo(caslib = 'casuser')

In [None]:
conn.fileInfo(caslib = 'casuser', path = 'MY_UDFS.sashdat')

If the CAS table with the UDF definition is not loaded you have to load it. You can do that with the loadfcmptable action. Here the CAS table is already loaded into memory from the previous action. Typically you would have to load it.

In [None]:
conn.loadfcmptable(table = 'MY_UDFS.sashdat', caslib = 'casuser')

#### Add the location of the CAS table that will contain the UDFs.

Set the **cmplib** option to point to the CAS table that we are creating that will contain the UDFs. This option specifies one or more SAS data sets that contain compiler subroutines to include during compilation. You can specify multiple tables.

In [None]:
conn.setSessOpt(cmplib = 'casuser.my_udfs')

View the option value to confirm.

In [None]:
(conn.
 listsessopts()['SessOpts']
 .query('Name == "cmplib"')
)

### Use the UDF

Confirm castbl is a CAS table.

In [None]:
castbl

In [None]:
castbl.head()

Create the new columns and preview the table to confirm the functions work as expected.

In [None]:
## The inplace = False parameter temporarily adds the parameter to the object and returns another CAS table. This enables chaining methods.
(castbl
 .eval("HighTempF = get_temp_value(Temp,1)", inplace = False)        
 .eval("LowTempF = get_temp_value(Temp,2)", inplace = False)
 .eval("HighTempCelsius = f_to_c(HighTempF)", inplace = False)
 .eval("LowTempCelsius = f_to_c(LowTempF)", inplace = False)
 .head()
)

## Create the Final CAS Table
The copyTable action creates a new CAS table named final_table.

In [None]:
(castbl
 .eval("HighTempF = get_temp_value(Temp,1)", inplace = False)
 .eval("LowTempF = get_temp_value(Temp,2)", inplace = False)
 .eval("HighTempCelsius = f_to_c(HighTempF)", inplace = False)
 .eval("LowTempCelsius = f_to_c(LowTempF)", inplace = False)
 .copyTable(casout = {'name':'final_table', 'caslib':'casuser'})
)

In [None]:
conn.tableInfo(caslib = 'casuser')

In [None]:
finalTbl = conn.CASTable('FINAL_TABLE', caslib = 'casuser')
finalTbl.head()

/* Create CAS session */
    
*cas conn;*

    
/* Create a libref to the Casuser caslib */
    
*libname casuser cas caslib='casuser';*

    
/* Create the test CAS table */
    
*data casuser.tempdata;*
    
*Temp = 'HighTemp = 83; LowTemp = 55;';*
    
*output;*
    
*Temp = 'HighTemp = 86; LowTemp = 59;';*
    
*output;*
    
*Temp = 'HighTemp = 92; LowTemp = 63;';*
    
*output;*
    
*Temp = 'HighTemp = 91; LowTemp = 65;';*
    
*output;*
    
*Temp = 'HighTemp = 80; LowTemp = 51;';*
    
*output;*
    
*run; *

    
/* Load the MY_UDFS file into memory to make the function definitions available */
    
*proc cas;*
    
    *fcmpact.loadFcmpTable /*
    
    *table='MY_UDFS.sashdat',*
    
    *caslib = 'casuser';*
    
*quit;*

/* Modify the cmplib option to use the CAS table */
    
*options sessopts=(cmplib='casuser.my_udfs') cmplib=(casuser.my_udfs);*
    

/* Use the function in the SAS data set to run in the CAS cluster */
    
*data casuser.final_sas / sessref=conn;*
    
    *set casuser.tempdata;*
    
   *HighTempF = get_temp_value(Temp,1);*
    
   * LowTempF = get_temp_value(Temp,2);*
    
    *HighTempCelsius = f_to_c(HighTempF);*
    
    *LowTempCelsius = f_to_c(LowTempF);*
    
*run;*

/* Preview the final CAS table */
    
*proc print data=casuser.final_sas(obs=5);*
    
*run;*


## Use the Function in SAS Code

One the UDFs are creating using the action, you can use them in other languages that work with the CAS server.


### SAS

IN SAS Studio:

/* Create CAS session */
    
cas conn;

    
/* Create a libref to the Casuser caslib */
    
libname casuser cas caslib='casuser';

    
/* Create the test CAS table */
    
data casuser.tempdata;
    
Temp = 'HighTemp = 83; LowTemp = 55;';
    
output;
    
Temp = 'HighTemp = 86; LowTemp = 59;';
    
output;
    
Temp = 'HighTemp = 92; LowTemp = 63;';
    
output;
    
Temp = 'HighTemp = 91; LowTemp = 65;';
    
output;
    
Temp = 'HighTemp = 80; LowTemp = 51;';
    
output;
    
run; 

    
/* Load the MY_UDFS file into memory to make the function definitions available */
    
proc cas;
    
	fcmpact.loadFcmpTable / 
    
		table='MY_UDFS.sashdat', 
    
		caslib = 'casuser';
    
quit;

/* Modify the cmplib option to use the CAS table */
    
options sessopts=(cmplib='casuser.my_udfs') cmplib=(casuser.my_udfs);
    

/* Use the function in the SAS data set to run in the CAS cluster */
    
data casuser.final_sas / sessref=conn;
    
    set casuser.tempdata;
    
    HighTempF = get_temp_value(Temp,1);
    
    LowTempF = get_temp_value(Temp,2);
    
    HighTempCelsius = f_to_c(HighTempF);
    
    LowTempCelsius = f_to_c(LowTempF);
    
run;

/* Preview the final CAS table */
    
proc print data=casuser.final_sas(obs=5);
    
run;


## Terminate the CAS session

In [None]:
conn.terminate()