# Connect to Snowflake

In [3]:
## Packages
import swat
import sys
import os
import pandas as pd
import numpy as np
import json


## Options
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', None)

try:
    from casauth import CASAuth
    print('Imported personal custom CAS auth package')
except:
    print('casauth package not available')

print(f'Python version:{sys.version.split("|")[0]}')
print(f'swat version:{swat.__version__}')
print(f'pandas version:{pd.__version__}')
print(f'numpy version:{np.__version__}')

Imported personal custom CAS auth package
Python version:3.8.16 (default, Mar  2 2023, 03:18:16) [MSC v.1916 64 bit (AMD64)]
swat version:1.13.0
pandas version:1.5.3
numpy version:1.24.3


## Connect to CAS
My personal CAS connection information.

In [4]:
path = os.getenv('CAS_CREDENTIALS')
pem_file = os.getenv('CAS_CLIENT_SSL_CA_LIST')

conn = CASAuth(path, ssl_ca_list = pem_file)

CAS Connection created


Current version of SAS Viya.

In [5]:
conn.about()['About']['Viya Version']

NOTE: Grid node action status report: 5 nodes, 9 total actions executed.


'Stable 2023.05'

## Connect to Snowflake
Connect to my demonstration Snowflake environment. I'm using a simple username and password to connect. Please follow any company guidelines to connect to your production Snowflake environment. I've stored all of my credentials in a JSON file. 

JSON file example:

{

    "srctype": "snowflake",
    "server": "<your account>.snowflakecomputing.com",
    "database": "database-name",
    "schema": "schema-name",
    "userName": "user-name",
    "password": "password"
}

For more information about connect to a Snowflake database:
- [SAS Viya Best practices with Snowflake Data](https://video.sas.com/detail/video/6312274491112/sas-viya-best-practices-with-snowflake-data)
- [Documentation - Snowflake Data Connector](https://go.documentation.sas.com/doc/en/pgmsascdc/default/casref/p183rli8obtde3n10y9bzbrpwnsh.htm)

In [None]:
## Get my Snowflake connection information
snow_creds = json.load(open(os.getenv('CAS_CREDENTIALS') + '\snowflake_creds.json'))

## Add the connection to Snowflake to CAS
conn.addcaslib(name = 'my_snow_db',
               datasource = dict(
                        srctype = 'snowflake',
                        server = snow_creds['server'],
                        database = snow_creds['database'],
                        schema = snow_creds['schema'],
                        userName = snow_creds['userName'],
                        password = snow_creds['password']
               )
              )

View available tables in Snowflake.

In [7]:
conn.fileInfo(caslib = 'my_snow_db')

Unnamed: 0,Catalog,Schema,Name,Type,Description
0,NEWDB,PUBLIC,CUSTOMER,TABLE,
1,NEWDB,PUBLIC,TESTTABLE,TABLE,


View available in-memory CAS tables in the Snowflake caslib (should be none since we haven't loaded anything into memory).

In [8]:
conn.tableInfo(caslib = 'my_snow_db')

NOTE: No tables are available in caslib my_snow_db of Cloud Analytic Services.


## Executing SQL

In [19]:
conn.loadActionSet('fedSQL')

NOTE: Added action set 'fedSQL'.


### Use SAS FedSQL

In [33]:
myQuery = '''
    SELECT C_BIRTH_YEAR, COUNT(*) as COUNT
    FROM MY_SNOW_DB.CUSTOMER
    GROUP BY C_BIRTH_YEAR;
'''

conn.execDirect(query = myQuery, method = True)

 
Methods for full query plan
----------------------------
        Agg 
          Sort 
            SeqScan from my_snow_db.CUSTOMER 
 
NOTE: Performing serial LoadTable action using SAS Data Connector to Snowflake.
Methods for stage 1
--------------------
        Agg 
            SeqScan with _pushed_ order by from my_snow_db.CUSTOMER 
 


Unnamed: 0,C_BIRTH_YEAR,COUNT
0,1983.0,1395637
1,1991.0,1398782
2,1988.0,1401315
3,1984.0,1401905
4,1987.0,1398236
5,1990.0,1398009
6,1981.0,1395851
7,1989.0,1397778
8,1985.0,1397692
9,,3500856


In [30]:
conn.tableInfo(caslib = 'my_snow_db')

NOTE: No tables are available in caslib my_snow_db of Cloud Analytic Services.


In [32]:
myQuery = '''
    SELECT C_BIRTH_YEAR, COUNT(*) as COUNT
    FROM MY_SNOW_DB.CUSTOMER
    GROUP BY C_BIRTH_YEAR;
'''

conn.execDirect(query = myQuery, 
                method = True,
                cntl = {
                    'requireFullPassThrough':True
                })

 
Methods for full query plan
----------------------------
        Agg 
          Sort 
            SeqScan from my_snow_db.CUSTOMER 
 
NOTE: Failure of broadcastDSSTextAndLen prevents full pass-through.
NOTE: Full pass-through to the underlying data source was not possible. Stopping execution.


### Use Snowflake SQL to process in the database

In [38]:
myQuery = '''
    SELECT * FROM CONNECTION TO MY_SNOW_DB
        (SELECT C_BIRTH_YEAR, COUNT(*) as COUNT
         FROM NEWDB.PUBLIC.CUSTOMER
         GROUP BY C_BIRTH_YEAR);
'''

conn.execDirect(query = myQuery, 
                method = True)

 
Methods for full query plan
----------------------------
        SeqScan from my_snow_db.__fedsql_cep_1__ 
 
Methods for stage 1
--------------------
        FedSQL did not generate a plan. Entire query can be pushed to driver.
 


Unnamed: 0,C_BIRTH_YEAR,COUNT
0,1988.0,1401315.0
1,1980.0,1403085.0
2,1982.0,1400132.0
3,1981.0,1395851.0
4,1983.0,1395637.0
5,1984.0,1401905.0
6,1985.0,1397692.0
7,1986.0,1397627.0
8,,3500856.0
9,1990.0,1398009.0


## Using CAS Actions to process the Snowflake data in the CAS sever

View available tables in Snowflake.

In [39]:
conn.fileInfo(caslib = 'my_snow_db')

Unnamed: 0,Catalog,Schema,Name,Type,Description
0,NEWDB,PUBLIC,CUSTOMER,TABLE,
1,NEWDB,PUBLIC,TESTTABLE,TABLE,


Load the Snowflake table into memory in the CAS server. Once the data is loaded in CAS you can use available Pandas API methods and CAS actions from the SWAT package to process your data.

Here I'll load the Snowflake table into memory in the **Casuser** caslib and name the CAS table **CUSTOMER**.

In [42]:
conn.loadTable('CUSTOMER', caslib = 'my_snow_db',
               casOut = {
                   'name':'cas_customers',
                   'caslib':'casuser'
               })

NOTE: Performing serial LoadTable action using SAS Data Connector to Snowflake.
NOTE: Cloud Analytic Services made the external data from CUSTOMER available as table CAS_CUSTOMERS in caslib CASUSER(Peter.Styliadis@sas.com).


View available in-memory CAS tables.

In [43]:
conn.tableInfo(caslib = 'casuser')

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,ModTime,AccessTime,Global,Repeated,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime
0,CAS_CUSTOMERS,21688526,18,0,utf-8,2023-06-21T14:25:43+00:00,2023-06-21T14:25:43+00:00,2023-06-21T14:25:43+00:00,UTF8,2002977000.0,2002977000.0,2002977000.0,0,0,0,0,CUSTOMER,my_snow_db,0,Peter.Styliadis@sas.com,,,


Reference the CAS table on the client.

In [44]:
castbl = conn.CASTable('cas_customers', caslib = 'casuser')

View CAS table details.

In [45]:
castbl.tableDetails()

Unnamed: 0,Node,Blocks,Active,Rows,IndexSize,DataSize,VardataSize,CompressedSize,CompressionRatio,Mapped,MappedMemory,Unmapped,UnmappedMemory,Allocated,AllocatedMemory,DeletedRows,TableLocation
0,ALL,11822,5911,21688526,0,6196029022,1511307406,0,0,5911,6196735488,5911,6196735488,0,0,0,CAS


In [48]:
castbl.head()

Unnamed: 0,C_CUSTOMER_SK,C_CUSTOMER_ID,C_CURRENT_CDEMO_SK,C_CURRENT_HDEMO_SK,C_CURRENT_ADDR_SK,C_FIRST_SHIPTO_DATE_SK,C_FIRST_SALES_DATE_SK,C_SALUTATION,C_FIRST_NAME,C_LAST_NAME,C_PREFERRED_CUST_FLAG,C_BIRTH_DAY,C_BIRTH_MONTH,C_BIRTH_YEAR,C_BIRTH_COUNTRY,C_LOGIN,C_EMAIL_ADDRESS,C_LAST_REVIEW_DATE
0,70253646.0,AAAAAAAAOEMPPCEA,,7026.0,19692070.0,,,,Nicole,,,,5.0,,OMAN,,,2452518.0
1,70253647.0,AAAAAAAAPEMPPCEA,380854.0,1173.0,19603445.0,2449173.0,2449143.0,Dr.,Bess,Bowers,Y,23.0,1.0,1983.0,PANAMA,,Bess.Bowers@RJz20CSGbbpr.edu,2452412.0
2,70253652.0,AAAAAAAAEFMPPCEA,1527255.0,2875.0,27465695.0,2450243.0,2450213.0,Dr.,David,Watts,Y,30.0,7.0,1984.0,GRENADA,,David.Watts@VNHS.com,2452470.0
3,70253668.0,AAAAAAAAEGMPPCEA,,,42715360.0,,,,,Rivas,,,,,,,Irene.Rivas@S1pljeX4zo7VLuBC3f.org,
4,70253669.0,AAAAAAAAFGMPPCEA,1095861.0,2073.0,11699038.0,2450318.0,2450288.0,Mr.,Scotty,Daly,N,28.0,10.0,1983.0,SOMALIA,,Scotty.Daly@K.org,2452590.0


In [52]:
castbl.nmiss()

C_CUSTOMER_SK                    0
C_CUSTOMER_ID                    0
C_CURRENT_CDEMO_SK         2080496
C_CURRENT_HDEMO_SK         2080893
C_CURRENT_ADDR_SK                0
C_FIRST_SHIPTO_DATE_SK     2080663
C_FIRST_SALES_DATE_SK      2080285
C_SALUTATION               2082574
C_FIRST_NAME               2079863
C_LAST_NAME                2080045
C_PREFERRED_CUST_FLAG      2081181
C_BIRTH_DAY                2079914
C_BIRTH_MONTH              2080201
C_BIRTH_YEAR               3500856
C_BIRTH_COUNTRY            2079364
C_LOGIN                   21688526
C_EMAIL_ADDRESS            2079952
C_LAST_REVIEW_DATE         2082389
dtype: int64

In [57]:
%%time
castbl.C_BIRTH_YEAR.value_counts(dropna=False)

CPU times: total: 31.2 ms
Wall time: 404 ms


NaN       3500856
1980.0    1403085
1984.0    1401905
1992.0    1401621
1988.0    1401315
1982.0    1400132
1991.0    1398782
1987.0    1398236
1990.0    1398009
1989.0    1397778
1985.0    1397692
1986.0    1397627
1981.0    1395851
1983.0    1395637
dtype: int64

## Terminate the CAS session

In [9]:
conn.terminate()

# Additional Resources

- [Getting Started with Python Integration to SAS® Viya® - Index](https://blogs.sas.com/content/sgf/2020/06/19/getting-started-with-python-integration-to-sas-viya-index/)
- [SAS Viya Best practices with Snowflake Data](https://video.sas.com/detail/video/6312274491112/sas-viya-best-practices-with-snowflake-data)
- [Documentation - Snowflake Data Connector](https://go.documentation.sas.com/doc/en/pgmsascdc/default/casref/p183rli8obtde3n10y9bzbrpwnsh.htm)