# Environment Setup

## Imports

In [2]:
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder
from snowflake.ml.modeling.xgboost import XGBClassifier
from snowflake.ml.modeling.metrics import accuracy_score
import pandas as pd
import json

## Create Snowpark Session

In [20]:
with open("data_scientist_auth.json") as f:
    data = json.load(f)
    username = data["username"]
    password = data["password"]
    account = data["account"]

connection_parameters = {
    "account": account,
    "user": username,
    "password": password,
    "role": "feature_admin",
    "warehouse": "tasty_dev_wh",
    "database": "features",
    "schema": "public",
}

session = Session.builder.configs(connection_parameters).create()

DatabaseError: 250001 (08001): Failed to connect to DB: SFSENORTHAMERICA-DC202.snowflakecomputing.com:443. Role 'FEATUREADMIN' specified in the connect string does not exist or not authorized. Contact your local system administrator, or attempt to login with another role, e.g. PUBLIC.

## Create Virtual Warehouse (Compute)

In [3]:
# Create a Virtual Warehouse
session.sql("CREATE OR REPLACE WAREHOUSE HOL_WH WITH WAREHOUSE_SIZE='X-SMALL'").collect()

[Row(status='Warehouse HOL_WH successfully created.')]

## Create Database for HOL

In [4]:
# Create Databse
session.sql("CREATE OR REPLACE DATABASE HOL_DB").collect()

[Row(status='Database HOL_DB successfully created.')]

## Load HOL data into Snowflake

In [3]:
# Loading from local CSV-files
application_record_df = pd.read_csv('data/application_record.csv.zip')
application_record_large_df = pd.concat([application_record_df] * 100, ignore_index=True)
credit_record_df = pd.read_csv('data/credit_record.csv.zip')

In [4]:
len(application_record_df)

438557

In [7]:
# Upload to Snowflake
session.write_pandas(application_record_df, table_name='APPLICATION_RECORD', auto_create_table=True, overwrite=True)
session.write_pandas(application_record_large_df, table_name='APPLICATION_RECORD_LARGE', auto_create_table=True, overwrite=True)
session.write_pandas(credit_record_df, table_name='CREDIT_RECORD', auto_create_table=True, overwrite=True)

<snowflake.snowpark.table.Table at 0x7fc19999ee50>

# Create Large Application Record Table

application_df = session.table('APPLICATION_RECORD_LARGE')
application_df = application_df.with_column('AGE', F.floor(F.abs(F.col('DAYS_BIRTH')) / 365))
application_df.write.save_as_table("APPLICATION_RECORD_LARGE", mode = "overwrite")