In [23]:
import os
import json
import pandas as pd
import xgboost as xgb
import configparser
from snowflake.snowpark import Session
from copy import copy
from snowflake.snowpark import Row
from snowflake.snowpark.functions import col, lit, sql_expr, get, get_path, udf, udtf, table_function, sproc, seq8, uniform
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, DecimalType, LongType
from snowflake.snowpark.exceptions import SnowparkJoinException, SnowparkSQLException
from snowflake.snowpark.files import SnowflakeFile

# Read snowflake credentials securely
config = configparser.ConfigParser()
config.read('assets/credentials.cfg')

connection_parameters = dict(
   account   =  config['SNOWPARKAWS']['SNOWFLAKE_ACCOUNT'],
   user      =  config['SNOWPARKAWS']['SNOWFLAKE_USER'],
   password  =  config['SNOWPARKAWS']['SNOWFLAKE_PASSWORD'],
   role      =  config['SNOWPARKAWS']['SNOWFLAKE_ROLE'],  # optional
   warehouse =  config['SNOWPARKAWS']['SNOWFLAKE_WAREHOUSE'],  # optional
   database  =  config['SNOWPARKAWS']['SNOWFLAKE_DATABASE'],  # optional
   schema    =  config['SNOWPARKAWS']['SNOWFLAKE_SCHEMA'],  # optional
)

# Pass this dictionary to the Session.builder.configs method to return a builder object that has these connection parameters.
# Call the create method of the builder to establish the session.
session = Session.builder.configs(connection_parameters).create()

`Understanding Different Ways to Read Data Using Snowpark Session Methods`

- There are different ways you can interract with snowflake to fetch the data from the tables or using a SQL syntax. The below mentioned are ways to read the data:

|Snowpark Method|Description|
|--|--|
|Session.table()|Returns a Table that points the specified table|
|Session.sql()|Returns a new DataFrame representing the results of a SQL query. You can use this method to execute a SQL statement|
|Session.call()|Calls a stored procedure by name|
|Session.table_function()|Creates a new DataFrame from the given snowflake SQL table function|

- Snowpark Session: https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/session

`Session.table()`
- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.table

In [5]:
session.table("SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY").limit(2).to_pandas()

In [2]:
db='SNOWFLAKE'
sch='ACCOUNT_USAGE'

session.table([db, sch, "QUERY_HISTORY"]).select(col("QUERY_ID"), col("START_TIME"), col("END_TIME")).limit(10).show()

--------------------------------------------------------------------------------------------------------------
|"QUERY_ID"                            |"START_TIME"                      |"END_TIME"                        |
--------------------------------------------------------------------------------------------------------------
|01b037e0-0404-d217-0000-000202db7009  |2023-11-09 09:36:23.675000-08:00  |2023-11-09 09:36:23.909000-08:00  |
|01b037e0-0404-d17f-0000-000202db2005  |2023-11-09 09:36:19.313000-08:00  |2023-11-09 09:36:19.552000-08:00  |
|01b037dd-0404-d101-0000-00000202db30  |2023-11-09 09:33:23.558000-08:00  |2023-11-09 09:33:23.684000-08:00  |
|01b037e0-0404-d114-0000-000202db300d  |2023-11-09 09:36:23.587000-08:00  |2023-11-09 09:36:23.657000-08:00  |
|01b037e0-0404-d114-0000-000202db3005  |2023-11-09 09:36:19.734000-08:00  |2023-11-09 09:36:20.021000-08:00  |
|01b037dd-0404-d0f9-0000-000202db1005  |2023-11-09 09:33:22.918000-08:00  |2023-11-09 09:33:23.227000-08:00  |
|

`session.sql`
- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.sql

In [9]:
session.sql("SELECT QUERY_ID, START_TIME, END_TIME FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY ORDER BY START_TIME DESC LIMIT 10").show()

--------------------------------------------------------------------------------------------------------------
|"QUERY_ID"                            |"START_TIME"                      |"END_TIME"                        |
--------------------------------------------------------------------------------------------------------------
|01b0f71d-0604-dd36-0002-02db0004c1be  |2023-12-13 09:33:00.134000-08:00  |2023-12-13 09:33:05.278000-08:00  |
|01b0f71c-0604-dd31-0002-02db00052196  |2023-12-13 09:32:32.679000-08:00  |2023-12-13 09:32:37.581000-08:00  |
|01b0f718-0604-dd34-0002-02db0004d1ea  |2023-12-13 09:28:31.827000-08:00  |2023-12-13 09:28:43.634000-08:00  |
|01b0f712-0604-dd37-0002-02db00051192  |2023-12-13 09:22:36.490000-08:00  |2023-12-13 09:22:37.781000-08:00  |
|01b0f712-0604-dd37-0002-02db0005118e  |2023-12-13 09:22:24.938000-08:00  |2023-12-13 09:22:29.940000-08:00  |
|01b0f70f-0604-dd31-0002-02db0005218e  |2023-12-13 09:19:20.605000-08:00  |2023-12-13 09:19:21.901000-08:00  |
|

In [10]:
# Use params to bind variables
session.sql("select * from values(?,?),(?,?)", params=[1,"a",2,"b"]).sort("column1").show()

-------------------------
|"COLUMN1"  |"COLUMN2"  |
-------------------------
|1          |a          |
|2          |b          |
-------------------------



`session.call`
- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.call

In [2]:
import snowflake.snowpark
from snowflake.snowpark.functions import sproc

session.add_packages('snowflake-snowpark-python')

@sproc(session=session, name="my_copy_sp", replace=True)
def my_copy(session:snowflake.snowpark.Session, from_table: str, to_table: str, count: int) -> str:
    session.table(from_table).limit(count).write.save_as_table(to_table)
    return "SUCCESS"

The version of package 'snowflake-snowpark-python' in the local environment is 1.11.1, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.


In [4]:
session.call("my_copy_sp", "SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY","Q_HISTORY",10)

'SUCCESS'

In [3]:
session.table("Q_HISTORY").count()

10

`table_function`
- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.table_function

In [6]:
from snowflake.snowpark.functions import lit
session.table_function("split_to_table", lit("split words to table"), lit(" ")).show()

-----------------------------
|"SEQ"  |"INDEX"  |"VALUE"  |
-----------------------------
|1      |1        |split    |
|1      |2        |words    |
|1      |3        |to       |
|1      |4        |table    |
-----------------------------



`Creating Dataframes for Query Results`

- We have 2 major methods to create dataframes using Snowflake Snowpark API. 

| Snowpark Method | Description |
|--|--|
|Session.createDataFrame | Creates a new DataFrame containing the specified values from the local data|
| Session.create_dataframe | Creates a new DataFrame containing the specified values from the local data|

- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.createDataFrame
- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.create_dataframe

In [19]:
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, DecimalType, LongType

schema = StructType([StructField("a", IntegerType()), StructField("b", StringType())])

df = session.create_dataframe([[1,"snow"],[2,"flake"]], schema=schema)
df.show()

df1 = session.createDataFrame([[1,"snow"],[2,"flake"]], schema=schema)
df1.show()

---------------
|"A"  |"B"    |
---------------
|1    |snow   |
|2    |flake  |
---------------

---------------
|"A"  |"B"    |
---------------
|1    |snow   |
|2    |flake  |
---------------



`Miscellaneous Methods for Accessing and Generating the Data Using Snowpark Python API`

- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/functions
- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.functions.lit

In [22]:
import datetime
from snowflake.snowpark.functions import lit, col, parse_json

columns = [lit(1), lit("1"), lit(1.0), lit(True), lit(b'snow'), lit(datetime.date(2023,12,14)), lit([1,2]), lit({"snow":"flake"})]

session.create_dataframe([[]]).select([c.alias(str(i)) for i, c in enumerate(columns)]).show()

---------------------------------------------------------------------------------------
|"0"  |"1"  |"2"  |"3"   |"4"                 |"5"         |"6"   |"7"                |
---------------------------------------------------------------------------------------
|1    |1    |1.0  |True  |bytearray(b'snow')  |2023-12-14  |[     |{                  |
|     |     |     |      |                    |            |  1,  |  "snow": "flake"  |
|     |     |     |      |                    |            |  2   |}                  |
|     |     |     |      |                    |            |]     |                   |
---------------------------------------------------------------------------------------



- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.functions.parse_json

In [23]:
df = session.create_dataframe([['{"key": "1"}']], schema=["a"])
df.select(parse_json(df.col("a")).alias("result")).show()

----------------
|"RESULT"      |
----------------
|{             |
|  "key": "1"  |
|}             |
----------------



- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.flatten

In [24]:
json_data = '''
{'widget': {
    'debug': 'on',
    'window': {
        'title': 'Sample Konfabulator Widget',
        'name': 'main_window',
        'width': 500,
        'height': 500
    },
    'image': { 
        'src': 'Images/Sun.png',
        'name': 'sun1',
        'hOffset': 250,
        'vOffset': 250,
        'alignment': 'center'
    },
    'text': {
        'data': 'Click Here',
        'size': 36,
        'style': 'bold',
        'name': 'text1',
        'hOffset': 250,
        'vOffset': 100,
        'alignment': 'center',
        'onMouseUp': 'sun1.opacity = (sun1.opacity / 100) * 90;'
    }
}}    
'''
session.flatten(parse_json(lit(json_data)), "widget.window", False, False, 'BOTH').show()
# Note: Session.flatten() is deprecated since 0.7.0. Use `Session.table_function()` instead.

Session.flatten() is deprecated since 0.7.0. Use `Session.table_function()` instead.


-----------------------------------------------------------------------------------------------------------------------------
|"SEQ"  |"KEY"   |"PATH"                |"INDEX"  |"VALUE"                       |"THIS"                                    |
-----------------------------------------------------------------------------------------------------------------------------
|1      |height  |widget.window.height  |NULL     |500                           |{                                         |
|       |        |                      |         |                              |  "height": 500,                          |
|       |        |                      |         |                              |  "name": "main_window",                  |
|       |        |                      |         |                              |  "title": "Sample Konfabulator Widget",  |
|       |        |                      |         |                              |  "width": 500                      

In [22]:
from snowflake.snowpark.functions import lit, col, parse_json, table_function

json_data = '''
{                                          
  "customer": [                            
    {                                      
      "address": "San Francisco, CA",      
      "name": "Joyce Ridgely",             
      "phone": "16504378889"               
    }                                      
  ],                                       
  "date": "2017-04-28",                    
  "dealership": "Valley View Auto Sales",  
  "salesperson": {                         
    "id": "55",                            
    "name": "Frank Beasley"                
  },                                       
  "vehicle": [                             
    {                                      
      "extras": [                          
        "ext warranty",                    
        "paint protection"                 
      ],                                   
      "make": "Honda",                     
      "model": "Civic",                    
      "price": "20275",                    
      "year": "2017"                       
    }                                      
  ]                                        
}   
'''

#df=session.table_function("flatten", parse_json(lit(json_data)))
#df.show()

df1=session.table_function("flatten", parse_json(lit(json_data))["vehicle"]).select(col("value").alias("vehicle"))
df1.show()

--------------------------
|"VEHICLE"               |
--------------------------
|{                       |
|  "extras": [           |
|    "ext warranty",     |
|    "paint protection"  |
|  ],                    |
|  "make": "Honda",      |
|  "model": "Civic",     |
|  "price": "20275",     |
|  "year": "2017"        |
|}                       |
--------------------------



- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.generator

In [26]:
# Creating a new DataFrame using the Generator table function
from snowflake.snowpark.functions import seq1, seq8, uniform

#session.generator(seq8(1).alias("sequence_one"), uniform(1,10,2).alias("uniform"), rowcount=3).show()
session.generator(seq8(1).alias("sequence_one"), uniform(1,10,2).alias("uniform"), timelimit=2).show()

------------------------------
|"SEQUENCE_ONE"  |"UNIFORM"  |
------------------------------
|0               |3          |
|1               |3          |
|2               |3          |
|3               |3          |
|4               |3          |
|5               |3          |
|6               |3          |
|7               |3          |
|8               |3          |
|9               |3          |
------------------------------



- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.query_history

In [31]:
# Create an instance of QueryHistory as a context manager to record queries that are pushed down to the Snowflake database.
with session.query_history() as query_history:
    df = session.create_dataframe([[1,2],[3,4]], schema=["a","b"])
    df.filter(df["a"]==1)
    res = df.show()

query_history.queries

-------------
|"A"  |"B"  |
-------------
|1    |2    |
|3    |4    |
-------------



[QueryRecord(query_id='01b0f911-0604-db2e-0002-02db0003e33a', sql_text='SELECT "A", "B" FROM ( SELECT $1 AS "A", $2 AS "B" FROM  VALUES (1 :: INT, 2 :: INT), (3 :: INT, 4 :: INT)) LIMIT 10')]

In [None]:
with session.query_history() as qh:
    session.sql('show databases').show()
    session.table('Q_HISTORY').drop_table()
    session.call('sf_copy_function','SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY','Q_HISTORY',10)

qh.queries

- https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session.range

In [36]:
session.range(1,10,2).show()

--------
|"ID"  |
--------
|1     |
|3     |
|5     |
|7     |
|9     |
--------



In [37]:
# Close Snowpark session
session.close()