In [16]:
import os
import json
import pandas as pd
import configparser
from snowflake.snowpark import Session
from copy import copy
from snowflake.snowpark import Row
from snowflake.snowpark.functions import col, lit, sql_expr, get, get_path, udf, udtf, table_function
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, DecimalType, LongType
from snowflake.snowpark.exceptions import SnowparkJoinException, SnowparkSQLException
from snowflake.snowpark.files import SnowflakeFile

# Read snowflake credentials securely
config = configparser.ConfigParser()
config.read('credentials.cfg')

# Create a Python dictionary (dict) containing the names and values of the parameters for connecting to Snowflake.
#connection_parameters = dict(
#   account   =  config['SNOWPARKAZ']['SNOWFLAKE_ACCOUNT'],
#   user      =  config['SNOWPARKAZ']['SNOWFLAKE_USER'],
#   password  =  config['SNOWPARKAZ']['SNOWFLAKE_PASSWORD'],
#   role      =  config['SNOWPARKAZ']['SNOWFLAKE_ROLE'],  # optional
#   warehouse =  config['SNOWPARKAZ']['SNOWFLAKE_WAREHOUSE'],  # optional
#   database  =  config['SNOWPARKAZ']['SNOWFLAKE_DATABASE'],  # optional
#   schema    =  config['SNOWPARKAZ']['SNOWFLAKE_SCHEMA'],  # optional
#)

connection_parameters = dict(
   account   =  config['SNOWPARKAWS']['SNOWFLAKE_ACCOUNT'],
   user      =  config['SNOWPARKAWS']['SNOWFLAKE_USER'],
   password  =  config['SNOWPARKAWS']['SNOWFLAKE_PASSWORD'],
   role      =  config['SNOWPARKAWS']['SNOWFLAKE_ROLE'],  # optional
   warehouse =  config['SNOWPARKAWS']['SNOWFLAKE_WAREHOUSE'],  # optional
   database  =  config['SNOWPARKAWS']['SNOWFLAKE_DATABASE'],  # optional
   schema    =  config['SNOWPARKAWS']['SNOWFLAKE_SCHEMA'],  # optional
)

# Pass this dictionary to the Session.builder.configs method to return a builder object that has these connection parameters.
# Call the create method of the builder to establish the session.
session = Session.builder.configs(connection_parameters).create()

In [2]:
# Creating an anonymous UDF
add_one = udf(lambda x: x+1, input_types=[IntegerType()], return_type=IntegerType())

In [None]:
session.sql("select add_one(5)").show()

----------------
|"ADD_ONE(5)"  |
----------------
|6             |
----------------



In [None]:
#Creating and registring a named UDF
# How to register a named temporary UDF
add_one = udf(lambda x:x+1, input_types=[IntegerType()], return_type=IntegerType(), name="my_udf", replace=True)

In [None]:
# How to register a named permanent UDF by setting the is_permanent argument to True
@udf(name="minus_one", is_permanent=True, stage_location="@sf_int_stg", replace=True)
def minus_one(x: int) -> int:
    return x - 1

In [None]:
# Call UDF in DataFrame
df = session.create_dataframe([[1,2],[3,4]]).to_df("a","b")
df.show()
df.select(add_one("a"), minus_one("b")).show()

-------------
|"A"  |"B"  |
-------------
|1    |2    |
|3    |4    |
-------------

----------------------------------------
|"MY_UDF(""A"")"  |"MINUS_ONE(""B"")"  |
----------------------------------------
|4                |3                   |
|2                |1                   |
----------------------------------------



In [None]:
# Call UDF using SQL
session.sql("select add_one(5), minus_one(9)").show()

---------------------------------
|"ADD_ONE(5)"  |"MINUS_ONE(9)"  |
---------------------------------
|6             |8               |
---------------------------------



In [None]:
# Creating a UDF from a Python source file
# Create a UDF from this function of file test_udf_file.py
mod5_udf = session.udf.register_from_file(
     file_path="test_udf_file.py"
    ,func_name="mod5"
)

In [None]:
session.range(1,8,2).select(mod5_udf("id")).to_df("col1").show()

----------
|"COL1"  |
----------
|3       |
|0       |
|2       |
|1       |
----------



In [None]:
# list stage files
session.sql("ls @sf_int_stg").show()

-----------------------------------------------------------------------------------------------------------
|"name"                        |"size"  |"md5"                             |"last_modified"               |
-----------------------------------------------------------------------------------------------------------
|sf_int_stg/car_sales.json     |848     |d69652cd0d10f4651db31c180bd8dff6  |Fri, 8 Dec 2023 17:25:35 GMT  |
|sf_int_stg/car_sales1.json    |656     |04c4262b096991965a9887a75393ef8d  |Sat, 9 Dec 2023 00:59:33 GMT  |
|sf_int_stg/data1.csv          |32      |ba2e510d1d6a424a5b0da2be426dad7f  |Fri, 8 Dec 2023 15:30:23 GMT  |
|sf_int_stg/data1.json         |80      |1a1c422f98f927bb42fdcdecab72faae  |Fri, 8 Dec 2023 15:30:23 GMT  |
|sf_int_stg/sales.json         |320     |6019c0d9cef2959d7e6cd54ce6319904  |Fri, 8 Dec 2023 15:30:23 GMT  |
|sf_int_stg/test_udf_file.py   |48      |5ccc824fece95f654836a6f862415576  |Sat, 9 Dec 2023 02:40:59 GMT  |
|sf_int_stg/test_udtf_file.p

In [None]:
# Create a UDF from Snowflake Internal stage location
mod5_udf = session.udf.register_from_file(
     file_path='@sf_int_stg/test_udf_file.py'
    ,func_name='mod5'
    ,return_type=IntegerType()
    ,input_types=[IntegerType()]
)

In [None]:
session.range(1,8,2).select(mod5_udf("id")).to_df("col1").show()

----------
|"COL1"  |
----------
|1       |
|3       |
|0       |
|2       |
----------



In [8]:
# Reading files with a UDF
# Reading Dynamically specified files with SnowflakeFile class
@udf(name='get_file_length', input_types=[StringType()], return_type=IntegerType(), packages=['snowflake-snowpark-python'], replace=True)
def get_file_length(file_path):
    with SnowflakeFile.open(file_path) as f:
        s = f.read()
    return len(s)

In [9]:
# Call the UDF
session.sql("select get_file_length(build_scoped_file_url(@sf_int_stg, 'test_udf_file.py'))").show()

------------------------------------------------------
|"GET_FILE_LENGTH(BUILD_SCOPED_FILE_URL(@SF_INT_...  |
------------------------------------------------------
|41                                                  |
------------------------------------------------------



In [6]:
# Defining a UDTF's input types and output schema
schema = StructType([StructField("symbol", StringType())
                    ,StructField("cost", IntegerType())
])


@udtf(name="test_udtf", input_types=[StringType(), IntegerType(), IntegerType()], output_schema=schema, stage_location='@sf_int_stg', is_permanent=True, replace=True)
class StockSale:
    def process(self, symbol, quantity, price):
        cost = quantity * price
        yield (symbol, cost)

In [2]:
# Closing a Session
session.close()