# Loading of data needed for examples

This notebook can be used to load the data into Snowflake that is used for some of the demos.

It will create tables in the database and schema used in the connection parameters ie creds.json

In [None]:
# Snowpark modules
from snowflake.snowpark.session import Session
import snowflake.snowpark.types as T

In [None]:
# Get a nicer output from .show()
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

### Connect to Snowflake

This example is using the connections.toml file to connect to Snowflake. You can read more at https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-connect#connecting-using-the-connections-toml-file how to set it up.

In [None]:
CONNECTION_NAME = 'MY SNOWFLAKE CONNECTION' # Name of the connection in connections.toml to be used to connect to Snowflake
data_path = "../data/" # WHere the data files is stored localy
data_stage_name = "~" # Using the user stage
DATABASE_NAME = 'SNOWPARK_DEMO_DB' # Database to use for data
DATABASE_SCHEMA = 'SOURCE_DATA' # Name of schema to store data and files in 
FULLY_QUALIFIED_NAME = f"{DATABASE_NAME}.{DATABASE_SCHEMA}"

snf_session = Session.builder.config("connection_name", CONNECTION_NAME).create()

Create the database and schema if they do not already exists

In [None]:
snf_session.sql(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}").collect()
snf_session.sql(f"CREATE SCHEMA IF NOT EXISTS {FULLY_QUALIFIED_NAME}").collect()
snf_session.sql(f"CREATE OR REPLACE STAGE {FULLY_QUALIFIED_NAME}.SOURCE_FILES").collect() # Stage for upload parquet files that is loaded in a Notebook
snf_session.use_schema(FULLY_QUALIFIED_NAME)

In [None]:
snf_session.get_fully_qualified_current_schema()

### Titanic data

In [None]:
# Upload the source file to the stage
putResult = snf_session.file.put(f"{data_path}titanic.csv", f"@{data_stage_name}", auto_compress=True, overwrite=True)

In [None]:
titanicSchema = T.StructType(
    [
        T.StructField("PCLASS", T.IntegerType()),
        T.StructField("SURVIVED", T.StringType()),
        T.StructField("NAME", T.StringType()),
        T.StructField("SEX", T.StringType()),
        T.StructField("AGE", T.FloatType()),
        T.StructField("SIBSP", T.FloatType()),
        T.StructField("PARCH", T.FloatType()),
        T.StructField("TICKET", T.StringType()),
        T.StructField("FARE", T.FloatType()),
        T.StructField("CABIN", T.StringType()),
        T.StructField("EMBARKED", T.StringType()),
        T.StructField("BOAT", T.StringType()),
        T.StructField("BODY", T.IntegerType()),
        T.StructField("HOME_DEST", T.StringType()),
    ]
)

# Crete a reader
dfReader = snf_session.read.schema(titanicSchema)

# Get the data into the data frame
dfTitanic_stage = dfReader.options({"field_delimiter":",", "FIELD_OPTIONALLY_ENCLOSED_BY":'"', "NULL_IF":"?", "SKIP_HEADER":1}).csv(f"@{data_stage_name}/titanic.csv.gz")
snf_session.sql(f"DROP TABLE IF EXISTS {FULLY_QUALIFIED_NAME}.titanic").collect()
dfTitanic_stage.copy_into_table(f"{FULLY_QUALIFIED_NAME}.titanic")

In [None]:
snf_session.table(f"{FULLY_QUALIFIED_NAME}.titanic").show()

### Campaign spend

In [None]:
# Upload the source file to the stage
snf_session.file.put(f"{data_path}campaign_spend.csv", f"@{data_stage_name}", auto_compress=True, overwrite=True)

In [None]:
campaignSchema = T.StructType(
    [
        T.StructField("CAMPAIGN", T.StringType()),
        T.StructField("CHANNEL", T.StringType()),
        T.StructField("DATE", T.DateType()),
        T.StructField("TOTAL_CLICKS", T.DecimalType(38,0)),
        T.StructField("TOTAL_COST", T.DecimalType(38,0)),
        T.StructField("ADS_SERVED", T.DecimalType(38,0)),
    ]
)

# Crete a reader
dfReader = snf_session.read.schema(campaignSchema)

# Get the data into the data frame
dfCampaign_stage = dfReader.options({"field_delimiter":",", "SKIP_HEADER":1}).csv(f"@{data_stage_name}/campaign_spend.csv.gz")
snf_session.sql(f"DROP TABLE IF EXISTS {FULLY_QUALIFIED_NAME}.campaign_spend").collect()
dfCampaign_stage.copy_into_table(f"{FULLY_QUALIFIED_NAME}.campaign_spend")

In [None]:
snf_session.table(f"{FULLY_QUALIFIED_NAME}.campaign_spend").show()

### Bank Marketing files



In [None]:
snf_session.file.put(f"{data_path}/bank/*", f"@{FULLY_QUALIFIED_NAME}.SOURCE_FILES/BANK_MARKETING", auto_compress=False, overwrite=True)

### Clean up

In [None]:
snf_session.sql(f"rm @{data_stage_name}/campaign_spend.csv.gz").collect()
snf_session.sql(f"rm @{data_stage_name}/titanic.csv.gz").collect()
snf_session.sql(f"ls @{data_stage_name}").show()

In [None]:
snf_session.close()