#### Why Snowpark?
The purpose of Snowpark lies in its ability to provide more flexibility, scalability, and integration for data processing and orchestration tasks.|

Orchestrating Jobs and Pipelines: We can automate Snowpark-based pipelines (UDFs, views, and other transformations) in a more flexible way. Snowpark code can be versioned and tested, making it easier to maintain and extend in your pipeline.

#### 1. Loading Data In

Since we already push our data onto Snowflake, we can call for them in this notebook to run in Snowpark. This will be the first step to the data flow overview for establishing the CI/CD deployment & finalizing the ELT pipepline.

test t

In [1]:
import os
from snowflake.snowpark import Session

# Print out the environment variables to debug
print("SNOWFLAKE_ACCOUNT:", os.getenv("SNOWFLAKE_ACCOUNT"))
print("SNOWFLAKE_USER:", os.getenv("SNOWFLAKE_USER"))
print("SNOWFLAKE_PASSWORD:", os.getenv("SNOWFLAKE_PASSWORD"))

connection_parameters = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
    "database": os.getenv("SNOWFLAKE_DATABASE"),
    "schema": os.getenv("SNOWFLAKE_SCHEMA", "SNOWSQL"),
    "role": os.getenv("SNOWFLAKE_ROLE", "ACCOUNTADMIN"),
    "insecure_mode": True
}

# Create the Snowpark session using the environment variables
session = Session.builder.configs(connection_parameters).create()


  from snowflake.snowpark import Session


SNOWFLAKE_ACCOUNT: None
SNOWFLAKE_USER: None
SNOWFLAKE_PASSWORD: None


AttributeError: 'NoneType' object has no attribute 'find'

In [1]:
import time
from snowflake.snowpark import Session

# Define all tables organized by categories or schemas
TABLE_DICT = {
    "application": {
        "schema": "KN_LOGISTICS.SNOWSQL", 
        "tables": [
            "APPLICATION_CITIES",
            "APPLICATION_COUNTRIES_SEA",
            "APPLICATION_DELIVERYMETHODS",
            "APPLICATION_PAYMENTMETHODS",
            "APPLICATION_PEOPLE",
            #"APPLCIATION_PEOPLE_CLEANED",
            #"APPLICATION_PEOPLE_TRANSFORMED",
            "APPLICATION_STATEPROVINCES",
            "APPLICATION_TRANSACTIONTYPES"
        ]
    },
    "purchasing": {
        "schema": "KN_LOGISTICS.SNOWSQL",
        "tables": [
            "PURCHASING_PURCHASEORDERLINES",
            "PURCHASING_PURCHASEORDERS",
            "PURCHASING_SUPPLIERCATEGORIES",
            "PURCHASING_SUPPLIERS",
            "PURCHASING_SUPPLIERS_CLEANED",
            "PURCHASING_SUPPLIERTRANSACTIONS",
            #"PURCHASING_SUPPLIERTRANSACTIONS_CLEANED"
        ]
    },
    "sales": {
        "schema": "KN_LOGISTICS.SNOWSQL",
        "tables": [
            "SALES_BUYINGGROUPS",
            #"SALES_BUYINGGROUPS_CLEANED",
            "SALES_CUSTOMERCATEGORIES",
            #"SALES_CUSTOMERCATEGORIES_CLEANED",
            "SALES_CUSTOMERS",
            "SALES_CUSTOMERTRANSACTIONS",
            #"SALES_CUSTOMERTRANSACTIONS_CLEANED",
            "SALES_INVOICELINES",
            "SALES_INVOICES",
            "SALES_ORDERLINES",
            "SALES_ORDERS"
        ]
    },
    "warehouse": {
        "schema": "KN_LOGISTICS.SNOWSQL",
        "tables": [
            "WAREHOUSE_COLDROOMTEMPERATURES",
            "WAREHOUSE_COLORS",
            "WAREHOUSE_PACKAGETYPES",
            "WAREHOUSE_STOCKGROUPS",
            "WAREHOUSE_STOCKITEMHOLDINGS",
            "WAREHOUSE_STOCKITEMS",
            "WAREHOUSE_STOCKITEMSTOCKGROUPS",
            #"WAREHOUSE_STOCKITEMS_CLEANED",
            "WAREHOUSE_STOCKITEMTRANSACTIONS",
            "WAREHOUSE_VEHICLETEMPERATURES"
        ]
    }
}

def load_raw_table(session, tname=None, schema=None):
    # Adjusted for direct use (no S3 staging assumed in your case)
    session.use_schema(schema)
    print(f"Loading table: {tname}")
    # If additional logic for transformations/loading is needed, add it here
    df = session.table(tname)
    df.show()  # Example action to verify table content

def load_all_tables(session):
    for category, data in TABLE_DICT.items():
        schema = data['schema']
        tables = data['tables']
        for tname in tables:
            load_raw_table(session, tname=tname, schema=schema)

def validate_tables(session):
    for category, data in TABLE_DICT.items():
        schema = data['schema']
        tables = data['tables']
        for tname in tables:
            session.use_schema(schema)
            print(f"Validating table: {tname}")
            print(f"Columns: {session.table(tname).columns}")

In [None]:
# Add the utils package to our path and import the snowpark_utils function
import os, sys
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

In [None]:
from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
load_all_tables(session)

In [None]:
validate_tables(session)

#### 2. Create SNOWSQL View
We will simplify the SNOWSQL schema by joining together the tables and picking only the columns we need. This will be done using Snowpark Dataframe API. Then we'll create a Snowflake stream on that view so that we can incrementally process changes to any of the SNOWSQL tables.

This setup is crucial for ensuring that once the initial data is loaded, we can efficiently manage incremental updates to the SNOWSQL data through the Snowflake stream.

This joining of tables can only be done when the group has finalized cleaning the tables in Snowflake (Snowsight).

In [None]:
# SNOWFLAKE ADVANTAGE: Snowpark DataFrame API
# SNOWFLAKE ADVANTAGE: Streams for incremental processing (CDC)
# SNOWFLAKE ADVANTAGE: Streams on views

from snowflake.snowpark import Session
import snowflake.snowpark.functions as F

def create_pos_view(session):
    session.use_schema('SNOWSQL')

    # Define DataFrames for each table with selected columns
    application_cities = session.table("KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES").select(
        F.col("CITYID"),
        F.col("CITYNAME"),
        F.col("STATEPROVINCEID"),
        F.col("LATITUDE"),
        F.col("LONGITUDE"),
        F.col("LATESTRECORDEDPOPULATION")
    )

    application_countries_sea = session.table("KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA").select(
        F.col("COUNTRYID"),
        F.col("COUNTRYNAME"),
        F.col("FORMALNAME"),
        F.col("LATESTRECORDEDPOPULATION"),
        F.col("CONTINENT"),
        F.col("REGION"),
        F.col("SUBREGION")
    )

    application_deliverymethods = session.table("KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS").select(
        F.col("DELIVERYMETHODID"),
        F.col("DELIVERYMETHODNAME")
    )

    application_paymentmethods = session.table("KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS").select(
        F.col("PAYMENTMETHODID"),
        F.col("PAYMENTMETHODNAME")
    )

    application_people = session.table("KN_LOGISTICS.SNOWSQL.APPLICATION_PEOPLE").select(
        F.col("FULLNAME"),
        F.col("ISEMPLOYEE"),
        F.col("ISSALESPERSON"),
        F.col("PERSONID"),
        F.col("PREFERREDNAME"),
        F.col("SEARCHNAME")
    )

    application_stateprovinces = session.table("KN_LOGISTICS.SNOWSQL.APPLICATION_STATEPROVINCES").select(
        F.col("COUNTRYID"),
        F.col("LATESTRECORDEDPOPULATION"),
        F.col("SALESTERRITORY"),
        F.col("STATEPROVINCECODE"),
        F.col("STATEPROVINCEID"),
        F.col("STATEPROVINCENAME")
    )

    purchasing_purchaseorderlines = session.table("KN_LOGISTICS.SNOWSQL.PURCHASING_PURCHASEORDERLINES").select(
        F.col("DESCRIPTION"),
        F.col("EXPECTEDUNITPRICEPEROUTER"),
        F.col("ISORDERLINEFINALIZED"),
        F.col("LASTRECEIPTDATE"),
        F.col("ORDERDOUTERS"),
        F.col("PACKAGETYPEID"),
        F.col("PURCHASEORDERID"),
        F.col("PURCHASEORDERLINEID"),
        F.col("RECEIVEDOUTERS"),
        F.col("STOCKITEMID")
    )

    purchasing_purchaseorders = session.table("KN_LOGISTICS.SNOWSQL.PURCHASING_PURCHASEORDERS").select(
        F.col("CONTACTPERSONID"),
        F.col("DELIVERYMETHODID"),
        F.col("EXPECTEDDELIVERYDATE"),
        F.col("ISORDERFINALIZED"),
        F.col("ORDERDATE"),
        F.col("PURCHASEORDERID"),
        F.col("SUPPLIERID"),
        F.col("SUPPLIERREFERENCE")
    )

    sales_customers = session.table("KN_LOGISTICS.SNOWSQL.SALES_CUSTOMERS").select(
        F.col("CUSTOMERID"),
        F.col("CUSTOMERNAME"),
        F.col("BILLTOCUSTOMERID"),
        F.col("CUSTOMERCATEGORYID"),
        F.col("BUYINGGROUPID"),
        F.col("PRIMARYCONTACTPERSONID"),
        F.col("ALTERNATECONTACTPERSONID"),
        F.col("DELIVERYMETHODID"),
        F.col("DELIVERYCITYID"),
        F.col("CREDITLIMIT"),
        F.col("ACCOUNTOPENEDDATE"),
        F.col("STANDARDDISCOUNTPERCENTAGE"),
        F.col("ISSTATEMENTSENT"),
        F.col("ISONCREDITHOLD"),
        F.col("PAYMENTDAYS"),
        F.col("PHONENUMBER"),
        F.col("WEBSITEURL")
    )

    warehouse_stockitems = session.table("KN_LOGISTICS.SNOWSQL.WAREHOUSE_STOCKITEMS").select(
        F.col("STOCKITEMID"),
        F.col("STOCKITEMNAME"),
        F.col("UNITPACKAGEID"),
        F.col("OUTERPACKAGEID"),
        F.col("BRAND"),
        F.col("SIZE"),
        F.col("LEADTIMEDAYS"),
        F.col("QUANTITYPEROUTER"),
        F.col("ISCHILLERSTOCK"),
        F.col("TAXRATE"),
        F.col("UNITPRICE"),
        F.col("RECOMMENDEDRETAILPRICE"),
        F.col("TYPICALWEIGHTPERUNIT"),
        F.col("SUPPLIERID"),
        F.col("COLORID")
    )

    # Define joins and final DataFrame logic here
    city_with_stateprov = application_cities.join(
        application_stateprovinces,
        application_cities["STATEPROVINCEID"] == application_stateprovinces["STATEPROVINCEID"]
    ).select(
        application_cities["CITYID"],
        application_cities["CITYNAME"],
        application_cities["STATEPROVINCEID"].alias("CITY_STATEPROVINCEID"),
        application_stateprovinces["STATEPROVINCEID"].alias("STATEPROV_STATEPROVINCEID"),
        application_stateprovinces["COUNTRYID"],
        application_stateprovinces["LATESTRECORDEDPOPULATION"].alias("STATE_POPULATION")
    )


    city_with_stateprov.create_or_replace_view("SQL_FLATTENED_V")
    

def create_pos_view_stream(session):
    session.use_schema('SNOWSQL')
    _ = session.sql('CREATE OR REPLACE STREAM SNOWSQL_FLATTENED_V_STREAM \
                        ON VIEW SQL_FLATTENED_V \
                        SHOW_INITIAL_ROWS = TRUE').collect()

def test_pos_view(session):
    session.use_schema('SNOWSQL')
    tv = session.table('SQL_FLATTENED_V')
    tv.limit(5).show()


In [None]:
# use this place to validate the tables
# test the code
create_pos_view(session)

In [None]:
# visualize
# Test the view created by create_pos_view
def validate_view(session):
    session.use_schema('SNOWSQL')
    # Load the view into a Snowpark DataFrame
    df = session.table("SQL_FLATTENED_V")
    # Display the first few rows for validation
    df.show()

validate_view(session)