In [1]:
import os
from auth import auth
import snowflake.snowpark.functions
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col

In [2]:
# establish connection
connection_params = {
    'account': auth.account,
    'user': auth.user,
    'password': auth.password,
    'role': 'DATA_ENGINEER',
    'warehouse': 'DATA_ENGINEER_WH',
    'database': 'NREYES',
    'schema': 'PUBLIC' 
}

session = Session.builder.configs(connection_params).create()

In [3]:
# test connection
print(
    session.sql(
        'select current_warehouse(), current_database(), current_schema()'
    ).collect()
)

[Row(CURRENT_WAREHOUSE()='DATA_ENGINEER_WH', CURRENT_DATABASE()='NREYES', CURRENT_SCHEMA()='PUBLIC')]


In [4]:
# sample queries
# the following statements will not execute in snowflake until an action is called: show(), count(), etc
df_customer = session.table('snowflake_sample_data.tpch_sf1.customer')
df_customer_filtered = df_customer.filter(col('c_mktsegment') == 'HOUSEHOLD')
df_customer_select = df_customer_filtered.select(col('c_name'), col('c_address'), col('c_mktsegment'))

df_customer_select.show() # limits 10 records
df_customer_select.count()

---------------------------------------------------------------------------
|"C_NAME"            |"C_ADDRESS"                        |"C_MKTSEGMENT"  |
---------------------------------------------------------------------------
|Customer#000060001  |9Ii4zQn9cX                         |HOUSEHOLD       |
|Customer#000060010  |c4vEEaV1tdqLdw2oVuXp BN            |HOUSEHOLD       |
|Customer#000060016  |i5JCEL8FRYSLvOs1Lkqn30a,1jherLDP   |HOUSEHOLD       |
|Customer#000060021  |KsKuSbEyZEenCWB7F1uMaOm54bciBqZfU  |HOUSEHOLD       |
|Customer#000060024  |7JBly7LjGnzGYfJCcPeyIoqIwWda       |HOUSEHOLD       |
|Customer#000060038  | FBvBksLyQsKovVujfq8XQ             |HOUSEHOLD       |
|Customer#000060043  |6SLQTGRhNJBKEJl9aekego5iwHgy       |HOUSEHOLD       |
|Customer#000060047  |FETxgdO1 Nn,HkjnlMNx3dvUrM4qMlf    |HOUSEHOLD       |
|Customer#000060048  |72pgkyyStsx2                       |HOUSEHOLD       |
|Customer#000060052  |jygrlXgrjEIaVsH14eKreo,ldCsK       |HOUSEHOLD       |
------------

30189

In [20]:
# query table statistics (numeric columns only)
# https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.DataFrame.describe
df_orders = session.table('snowflake_sample_data.tpch_sf1.orders')
df_orders_select = df_orders.select(col('o_totalprice'))

df_orders_select.show()
df_orders_select.describe().sort('summary', ascending=True).show()

------------------
|"O_TOTALPRICE"  |
------------------
|30175.88        |
|297999.63       |
|345438.38       |
|135965.53       |
|209937.09       |
|140186.32       |
|298655.07       |
|175973.90       |
|4635.38         |
|348308.79       |
------------------

--------------------------------
|"SUMMARY"  |"O_TOTALPRICE"    |
--------------------------------
|count      |1500000.0         |
|max        |555285.16         |
|mean       |151219.53763164   |
|min        |857.71            |
|stddev     |88621.4313636512  |
--------------------------------

