In [None]:
%pip install snowflake-connector-python
%pip install snowflake-snowpark-python

In [None]:
import os
from dotenv import load_dotenv
from snowflake.snowpark import Session
import snowflake.snowpark.functions as f
from snowflake.snowpark.window import Window
from datetime import date
from snowflake.snowpark.types import ArrayType, VariantType 

In [None]:
load_dotenv()

In [None]:
def snowpark_session_create():
    connection_params = {
        "account": os.getenv("SNOWFLAKE_ACCOUNT"),
        "user": os.getenv("SNOWFLAKE_USER"),
        "password": os.getenv("SNOWFLAKE_PASSWORD"),
        "role": os.getenv("SNOWFLAKE_ROLE"),
        "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
        "database": os.getenv("SNOWFLAKE_DATABASE"),
        "schema": os.getenv("SNOWFLAKE_SCHEMA")
    }


    session = Session.builder.configs(connection_params).create()
    return session

In [None]:
demo_session = snowpark_session_create()

In [None]:
df = demo_session.sql("SELECT * FROM DEMO.RAW.RAW_CREDIT_SALES ")
df.show()

In [None]:
demo_session.use_database("demo")
demo_session.use_schema("raw")
demo_session.table("raw_credit_sales")
demo_session.table("raw_credit_sales_items")

In [None]:
sales_items = demo_session.table("raw_credit_sales_items")
sales_items.show()

transform data into structured format

In [None]:
#lit funct creates a column
sales_items_strcd = (
    sales_items.join_table_function("flatten",f.col("data"))
    .withColumn("credit_card_number",f.json_extract_path_text("value",f.lit("creditCardNumber")) )
    .withColumn("date_time",f.json_extract_path_text("value",f.lit("dateTime")))
    .withColumn("id",f.json_extract_path_text("value",f.lit("id")))
    .withColumn("items",f.json_extract_path_text("value",f.lit("items")))
    .select("ingestion_id","ingested_at","credit_card_number","date_time","id","items")
)

sales_items_strcd.show()

creating a stage for udf

In [None]:
demo_session.sql("create or replace stage demo_stage").collect()

In [None]:
demo_session.file.put("udf.py","@demo_stage",auto_compress=False)

In [None]:
#we need to register the udf from the srage
sort_items = demo_session.udf.register_from_file(
    file_path="@demo_stage/udf.py",
    func_name="sortitems",
    return_type=ArrayType(),
    input_types=[VariantType()],
    name="sort_items",
    replace=True,
    stage_location="@demo_stage",
    is_permanent=True
)

In [None]:
sales_items_strcd_wudf = ( 
    sales_items.join_table_function("flatten",f.col("data"))
    .withColumn("credit_card_number",f.json_extract_path_text("value",f.lit("creditCardNumber")) )
    .withColumn("date_time",f.json_extract_path_text("value",f.lit("dateTime")))
    .withColumn("id",f.json_extract_path_text("value",f.lit("id")))
    .withColumn("items",sort_items(f.parse_json(f.json_extract_path_text("value",f.lit("items")))))
    .select("ingestion_id","ingested_at","credit_card_number","date_time","id","items"))

In [None]:
sales_items_strcd_wudf.show()

we can save this table now :)

In [None]:
sales_items_strcd_wudf.write.mode("overwrite").save_as_table("sales_items")