In [None]:
# Import python packages
import streamlit as st
import pandas as pd
from datetime import datetime
import snowflake.ml.modeling.preprocessing as snowml
from snowflake.ml.modeling.preprocessing import OneHotEncoder
from snowflake.snowpark.functions import col, to_timestamp, min, max, month, dayofweek, dayofyear, avg, date_add, sql_expr
from snowflake.snowpark import Window
from snowflake.snowpark.types import IntegerType
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score
from snowflake.ml.registry import Registry

#Snowflake feature store
from snowflake.ml.feature_store import FeatureStore, FeatureView, Entity, CreationMode

import matplotlib.pyplot as plt
import seaborn as sns
import re


# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()
VERSION_NUM = '1'
DB = 'DEMO'
SCHEMA = 'PUBLIC'
COMPUTE_WAREHOUSE = 'DEMO_WH'


We have now shown how you can easily build, iterate on, and deploy models.  Now let's dive in to some more advanced features and explain why they are important for large scale production ML workloads.  This will include.... 

- Feature Store
- Experiment Tracking
- Distributed multi node training with Ray
- Real time inference.

## Time for Feature Store, Experiment Tracking, and distributed HPO with Ray!

Experiment Tracking provides a mechanism for creating experiments and logging runs within Snowflake from any development environment. This capability allows you to log key pieces of information regarding your model training runs such as model parameters and metrics. In the UI, you can deep dive into a particular run or compare multiple runs to find the optimal model.
Below we will train multiple models using distributed HPO and log results to the Experiment Tracker!

In [None]:
try:
    print("Reading table data...")
    df = session.table(f"{DB}.{SCHEMA}.MORTGAGE_LENDING_DEMO_DATA")
    df.show()
except:
    print("Table not found! Uploading data to snowflake table")
    df_pandas = pd.read_csv("MORTGAGE_LENDING_DEMO_DATA.csv.zip")
    session.write_pandas(df_pandas, table_name="MORTGAGE_LENDING_DEMO_DATA",database= DB,schema=SCHEMA, auto_create_table=True)
    df.show()

In [None]:
df.select(min('TS'), max('TS'))

#Get current date and time
current_time = datetime.now()
df_max_time = datetime.strptime(str(df.select(max("TS")).collect()[0][0]), "%Y-%m-%d %H:%M:%S.%f")

#Find delta between latest existing timestamp and today's date
timedelta = current_time- df_max_time

#Update timestamps to represent last ~1 year from today's date
df.select(min(date_add(to_timestamp("TS"), timedelta.days-1)), max(date_add(to_timestamp("TS"), timedelta.days-1)))

#Create a dict with keys for feature names and values containing transform code

feature_eng_dict = dict()

#Timstamp features
feature_eng_dict["TIMESTAMP"] = date_add(to_timestamp("TS"), timedelta.days-1)
feature_eng_dict["MONTH"] = month("TIMESTAMP")
feature_eng_dict["DAY_OF_YEAR"] = dayofyear("TIMESTAMP") 
feature_eng_dict["DOTW"] = dayofweek("TIMESTAMP")

# df= df.with_columns(feature_eng_dict.keys(), feature_eng_dict.values())

#Income and loan features
feature_eng_dict["LOAN_AMOUNT"] = col("LOAN_AMOUNT_000s")*1000
feature_eng_dict["INCOME"] = col("APPLICANT_INCOME_000s")*1000
feature_eng_dict["INCOME_LOAN_RATIO"] = col("INCOME")/col("LOAN_AMOUNT")

county_window_spec = Window.partition_by("COUNTY_NAME")
feature_eng_dict["MEAN_COUNTY_INCOME"] = avg(col("INCOME").cast(IntegerType())).over(county_window_spec).astype(IntegerType())
feature_eng_dict["HIGH_INCOME_FLAG"] = (col("INCOME")>col("MEAN_COUNTY_INCOME")).astype(IntegerType())
feature_eng_dict["AVG_THIRTY_DAY_LOAN_AMOUNT"] =  sql_expr("""AVG(LOAN_AMOUNT) OVER (PARTITION BY COUNTY_NAME ORDER BY TIMESTAMP  
                                                            RANGE BETWEEN INTERVAL '30 DAYS' PRECEDING AND CURRENT ROW)""")

df = df.with_columns(feature_eng_dict.keys(), feature_eng_dict.values())
df.show(3)

In [None]:
fs = FeatureStore(
    session=session, 
    database=DB, 
    name=SCHEMA, 
    default_warehouse=COMPUTE_WAREHOUSE,
    creation_mode=CreationMode.CREATE_IF_NOT_EXIST
)

fs.list_entities()

In [None]:
#First try to retrieve an existing entity definition, if not define a new one and register
try:
    #retrieve existing entity
    loan_id_entity = fs.get_entity('LOAN_ENTITY') 
    print('Retrieved existing entity')
except:
#define new entity
    loan_id_entity = Entity(
        name = "LOAN_ENTITY",
        join_keys = ["LOAN_ID"],
        desc = "Features defined on a per loan level")
    #register
    fs.register_entity(loan_id_entity)
    print("Registered new entity")

#Create a dataframe with just the ID, timestamp, and engineered features. We will use this to define our feature view
feature_df = df.select(["LOAN_ID"]+list(feature_eng_dict.keys()))
feature_df.show(5)

In [None]:
#define and register feature view
loan_fv = FeatureView(
    name="Mortgage_Feature_View",
    entities=[loan_id_entity],
    feature_df=feature_df,
    timestamp_col="TIMESTAMP",
    refresh_freq="1 day")

#add feature level descriptions

loan_fv = loan_fv.attach_feature_desc(
    {
        "MONTH": "Month of loan",
        "DAY_OF_YEAR": "Day of calendar year of loan",
        "DOTW": "Day of the week of loan",
        "LOAN_AMOUNT": "Loan amount in $USD",
        "INCOME": "Household income in $USD",
        "INCOME_LOAN_RATIO": "Ratio of LOAN_AMOUNT/INCOME",
        "MEAN_COUNTY_INCOME": "Average household income aggregated at county level",
        "HIGH_INCOME_FLAG": "Binary flag to indicate whether household income is higher than MEAN_COUNTY_INCOME",
        "AVG_THIRTY_DAY_LOAN_AMOUNT": "Rolling 30 day average of LOAN_AMOUNT"
    }
)

loan_fv = fs.register_feature_view(loan_fv, version=VERSION_NUM, overwrite=True)

fs.list_feature_views()

In [None]:
ds = fs.generate_dataset(
    name=f"MORTGAGE_DATASET_EXTENDED_FEATURES_{VERSION_NUM}",
    spine_df=df.select("LOAN_ID", "TIMESTAMP", "LOAN_PURPOSE_NAME","MORTGAGERESPONSE"), #only need the features used to fetch rest of feature view
    features=[loan_fv],
    spine_timestamp_col="TIMESTAMP",
    spine_label_cols=["MORTGAGERESPONSE"]
)

#Convert Dataset to Snowpark Dataframe

ds_sp = ds.read.to_snowpark_dataframe()
ds_sp.show(5)

In [None]:
import snowflake.ml.modeling.preprocessing as snowml
from snowflake.snowpark.types import StringType

OHE_COLS = ds_sp.select([col.name for col in ds_sp.schema if col.datatype ==StringType()]).columns
OHE_POST_COLS = [i+"_OHE" for i in OHE_COLS]


# Encode categoricals to numeric columns
snowml_ohe = snowml.OneHotEncoder(input_cols=OHE_COLS, output_cols = OHE_COLS, drop_input_cols=True)
ds_sp_ohe = snowml_ohe.fit(ds_sp).transform(ds_sp)

#Rename columns to avoid double nested quotes and white space chars
rename_dict = {}
for i in ds_sp_ohe.columns:
    if '"' in i:
        rename_dict[i] = i.replace('"','').replace(' ', '_')

ds_sp_ohe = ds_sp_ohe.rename(rename_dict)
ds_sp_ohe.columns

In [None]:
train, test = ds_sp_ohe.random_split(weights=[0.70, 0.30], seed=0)
train = train.fillna(0)
test = test.fillna(0)
train_pd = train.to_pandas()
test_pd = test.to_pandas()

# Convert all boolean columns to integers
train_pd = train_pd.apply(lambda x: x.astype(int) if x.dtype == 'bool' else x)
train_pd.columns = [re.sub(r'[^a-zA-Z0-9]+', '_', col.upper()) for col in train_pd.columns]

test_pd = test_pd.apply(lambda x: x.astype(int) if x.dtype == 'bool' else x)
test_pd.columns = [re.sub(r'[^a-zA-Z0-9]+', '_', col.upper()) for col in test_pd.columns]

In [None]:
#Define model config
xgb_base = XGBClassifier(
    max_depth=50,
    n_estimators=3,
    learning_rate = 0.75,
    booster = 'gbtree')

#Split train data into X, y
X_train_pd = train_pd.drop(["TIMESTAMP", "LOAN_ID", "MORTGAGERESPONSE"],axis=1) #remove
y_train_pd = train_pd.MORTGAGERESPONSE

#train model
xgb_base.fit(X_train_pd,y_train_pd)

from sklearn.metrics import f1_score, precision_score, recall_score
train_preds_base = xgb_base.predict(X_train_pd) #update this line with correct ata

f1_base_train = round(f1_score(y_train_pd, train_preds_base),4)
precision_base_train = round(precision_score(y_train_pd, train_preds_base),4)
recall_base_train = round(recall_score(y_train_pd, train_preds_base),4)

print(f'F1: {f1_base_train} \nPrecision {precision_base_train} \nRecall: {recall_base_train}')

In [None]:
#Create a snowflake model registry object 
from snowflake.ml.registry import Registry

# Define model name
model_name = f"MORTGAGE_LENDING_MLOPS_{VERSION_NUM}"

# Create a registry to log the model to
model_registry = Registry(session=session, 
                          database_name=DB, 
                          schema_name=SCHEMA,
                          options={"enable_monitoring": True})

#Log the base model to the model registry (if not already there)
base_version_name = 'XGB_BASE'

try:
    #Check for existing model
    mv_base = model_registry.get_model(model_name).version(base_version_name)
    print("Found existing model version!")
except:
    print("Logging new model version...")
    #Log model to registry
    mv_base = model_registry.log_model(
        model_name=model_name,
        model=xgb_base, 
        version_name=base_version_name,
        sample_input_data = train.drop(["TIMESTAMP", "LOAN_ID", "MORTGAGERESPONSE"]).limit(100), #using snowpark df to maintain lineage
        comment = f"""ML model for predicting loan approval likelihood.
                    This model was trained using XGBoost classifier.
                    Hyperparameters used were:
                    max_depth={xgb_base.max_depth}, 
                    n_estimators={xgb_base.n_estimators}, 
                    learning_rate = {xgb_base.learning_rate}, 
                    algorithm = {xgb_base.booster}
                    """,
        target_platforms= ["WAREHOUSE", "SNOWPARK_CONTAINER_SERVICES"],
        options= {"enable_explainability": True}

    )
    
    #set metrics
    mv_base.set_metric(metric_name="Train_F1_Score", value=f1_base_train)
    mv_base.set_metric(metric_name="Train_Precision_Score", value=precision_base_train)
    mv_base.set_metric(metric_name="Train_Recall_score", value=recall_base_train)

In [None]:
#Create tag for PROD model
session.sql(f'CREATE OR REPLACE TAG {DB}.{SCHEMA}.PROD')

In [None]:
#Apply prod tag 
m = model_registry.get_model(model_name)
m.comment = "Loan approval prediction models" #set model level comment
m.set_tag(f'{DB}.{SCHEMA}.PROD', base_version_name)
m.show_tags()

In [None]:
reg_preds = mv_base.run(test, function_name = "predict").rename(col('"output_feature_0"'), 'MORTGAGE_PREDICTION')

preds_pd = reg_preds.select(["MORTGAGERESPONSE", "MORTGAGE_PREDICTION"]).to_pandas()
f1_base_test = round(f1_score(preds_pd.MORTGAGERESPONSE, preds_pd.MORTGAGE_PREDICTION),4)
precision_base_test = round(precision_score(preds_pd.MORTGAGERESPONSE, preds_pd.MORTGAGE_PREDICTION),4)
recall_base_test = round(recall_score(preds_pd.MORTGAGERESPONSE, preds_pd.MORTGAGE_PREDICTION),4)

#log metrics to model registry model
mv_base.set_metric(metric_name="Test_F1_Score", value=f1_base_test)
mv_base.set_metric(metric_name="Test_Precision_Score", value=precision_base_test)
mv_base.set_metric(metric_name="Test_Recall_score", value=recall_base_test)

print(f'F1: {f1_base_train} \nPrecision {precision_base_train} \nRecall: {recall_base_train}')
print('----------')
print(f'F1: {f1_base_test} \nPrecision {precision_base_test} \nRecall: {recall_base_test}')

Model is still overfit let's use experiment tracking to help

In [None]:
from snowflake.ml.data import DataConnector
from snowflake.ml.modeling.tune import get_tuner_context
from snowflake.ml.modeling import tune
from entities import search_algorithm
import psutil
from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
from snowflake.ml.runtime_cluster import get_ray_dashboard_url, scale_cluster

st.write('Click the link below to view the ray cluster and follow along with your HPO job progress!')
st.write('https://'+get_ray_dashboard_url())

#Define dataset map
dataset_map = {
    "x_train": DataConnector.from_dataframe(train.drop("MORTGAGERESPONSE", "TIMESTAMP", "LOAN_ID")),
    "y_train": DataConnector.from_dataframe(train.select("MORTGAGERESPONSE")),
    "x_test": DataConnector.from_dataframe(test.drop("MORTGAGERESPONSE","TIMESTAMP", "LOAN_ID")),
    "y_test": DataConnector.from_dataframe(test.select("MORTGAGERESPONSE"))
    }

# Scale up the cluster
scale_cluster(2)

# Define a training function, with any models you choose within it.
def train_func():

    local_session = get_active_session()
    exp = ExperimentTracking(session=local_session)
    
    exp.set_experiment("E2E_MLOPS_HPO_Experiments")
    with exp.start_run():
        # A context object provided by HPO API to expose data for the current HPO trial
        
        tuner_context = get_tuner_context()
        
        #Generate params
        config = tuner_context.get_hyper_params()
        dm = tuner_context.get_dataset_map()
    
        #Log params to experiment tracking
        exp.log_params(config)
        
        #Instantiate mdoel with generated params
        model = XGBClassifier(**config, random_state=42)
    
        X_train_pd = dm["x_train"].to_pandas().sort_index()
        y_train_pd = dm["y_train"].to_pandas().sort_index()
        X_test_pd = dm["x_test"].to_pandas().sort_index()
        y_test_pd = dm["y_test"].to_pandas().sort_index()
    
        #Train model, get preds
        model.fit(X_train_pd,y_train_pd)

        #Run inference on train preds
        train_preds = model.predict(X_train_pd)

        #Run inference on test preds
        test_preds = model.predict(X_test_pd)
        
        #compute metrics 
        f1_train = f1_score(y_train_pd,train_preds)
        precision_train = precision_score(y_train_pd,train_preds)
        recall_train = recall_score(y_train_pd,train_preds)

        f1_test = f1_score(y_test_pd,test_preds)
        precision_test = precision_score(y_test_pd,test_preds)
        recall_test = recall_score(y_test_pd,test_preds)
    
        metrics_to_log = {"F1_Train": f1_train,
                         "Precision_Train": precision_train,
                         "Recall_Train": recall_train,
                         "F1_Test": f1_test,
                         "Precision_Test": precision_test,
                         "Recall_Test": recall_test,}
    
        #Log metrics to experiment tracking and tuner context 
        exp.log_metrics(metrics_to_log)
    
        tuner_context.report(metrics=metrics_to_log, model=model)

        
tuner = tune.Tuner(
    train_func=train_func,
    search_space={
        "max_depth": tune.randint(1, 30),
        "learning_rate": tune.uniform(0.01, 0.5),
        "n_estimators": tune.randint(50, 150),
    },
    tuner_config=tune.TunerConfig(
        metric="F1_Test",
        mode="max",
        search_alg=search_algorithm.RandomSearch(random_state=101),
        num_trials=4, #run 4 trial runs
    ),
)

In [None]:
tuner_results = tuner.run(dataset_map=dataset_map)
tuner_results.results

In [None]:
#Select best model results and inspect configuration
tuned_model = tuner_results.best_model
tuned_model

In [None]:
#Generate predictions
xgb_opt_preds = tuned_model.predict(train_pd.drop(["TIMESTAMP", "LOAN_ID", "MORTGAGERESPONSE"],axis=1))

#Generate performance metrics
f1_opt_train = round(f1_score(train_pd.MORTGAGERESPONSE, xgb_opt_preds),4)
precision_opt_train = round(precision_score(train_pd.MORTGAGERESPONSE, xgb_opt_preds),4)
recall_opt_train = round(recall_score(train_pd.MORTGAGERESPONSE, xgb_opt_preds),4)

print(f'Train Results: \nF1: {f1_opt_train} \nPrecision {precision_opt_train} \nRecall: {recall_opt_train}')

#Generate test predictions
xgb_opt_preds_test = tuned_model.predict(test_pd.drop(["TIMESTAMP", "LOAN_ID", "MORTGAGERESPONSE"],axis=1))

#Generate performance metrics on test data
f1_opt_test = round(f1_score(test_pd.MORTGAGERESPONSE, xgb_opt_preds_test),4)
precision_opt_test = round(precision_score(test_pd.MORTGAGERESPONSE, xgb_opt_preds_test),4)
recall_opt_test = round(recall_score(test_pd.MORTGAGERESPONSE, xgb_opt_preds_test),4)

print(f'Test Results: \nF1: {f1_opt_test} \nPrecision {precision_opt_test} \nRecall: {recall_opt_test}')

In [None]:
#Log the optimized model to the model registry (if not already there)
optimized_version_name = 'XGB_Optimized'

try:
    #Check for existing model
    mv_opt = model_registry.get_model(model_name).version(optimized_version_name)
    print("Found existing model version!")
except:
    #Log model to registry
    print("Logging new model version...")
    mv_opt = model_registry.log_model(
        model_name=model_name,
        model=tuned_model, 
        version_name=optimized_version_name,
        sample_input_data = train.drop(["TIMESTAMP", "LOAN_ID", "MORTGAGERESPONSE"]).limit(100),
        comment = f"""HPO ML model for predicting loan approval likelihood.
            This model was trained using XGBoost classifier.
            Optimized hyperparameters used were:
            max_depth={tuned_model.max_depth}, 
            n_estimators={tuned_model.n_estimators}, 
            learning_rate = {tuned_model.learning_rate}, 
            """,
        target_platforms= ["WAREHOUSE", "SNOWPARK_CONTAINER_SERVICES"],
        options= {"enable_explainability": True}

    )
    #Set metrics
    mv_opt.set_metric(metric_name="Train_F1_Score", value=f1_opt_train)
    mv_opt.set_metric(metric_name="Train_Precision_Score", value=precision_opt_train)
    mv_opt.set_metric(metric_name="Train_Recall_score", value=recall_opt_train)

    mv_opt.set_metric(metric_name="Test_F1_Score", value=f1_opt_test)
    mv_opt.set_metric(metric_name="Test_Precision_Score", value=precision_opt_test)
    mv_opt.set_metric(metric_name="Test_Recall_score", value=recall_opt_test)

In [None]:
#Here we see the BASE version is our default version
model_registry.get_model(model_name).default

In [None]:
reg = Registry(session=session, database_name= DB, schema_name= SCHEMA)

recent_model_name = reg.get_model(model_name).last().version_name
m.default = recent_model_name
m.default

Explain our model

In [None]:
#create a sample of 1000 records
test_pd_sample=test_pd.rename(columns=rename_dict).sample(n=2500, random_state = 100).reset_index(drop=True)

#Compute shapley values for each model
base_shap_pd = mv_base.run(test_pd_sample, function_name="explain")
opt_shap_pd = mv_opt.run(test_pd_sample, function_name="explain")

In [None]:
from snowflake.ml.monitoring import explain_visualize

feat_df=test_pd_sample.drop(["MORTGAGERESPONSE","TIMESTAMP", "LOAN_ID"],axis=1)

explain_visualize.plot_influence_sensitivity(base_shap_pd, feat_df, figsize=(750, 250))

In [None]:
train.write.save_as_table(f"{DB}.{SCHEMA}.DEMO_MORTGAGE_LENDING_TRAIN_{VERSION_NUM}", mode="overwrite")
test.write.save_as_table(f"{DB}.{SCHEMA}.DEMO_MORTGAGE_LENDING_TEST_{VERSION_NUM}", mode="overwrite")

In [None]:
session.sql(f'CREATE stage IF NOT EXISTS {DB}.{SCHEMA}.ML_STAGE').collect()

In [None]:
from snowflake import snowpark

def demo_inference_sproc(session: snowpark.Session, table_name: str, modelname: str, modelversion: str) -> str:

    reg = Registry(session=session)
    m = reg.get_model(model_name)  # Fetch the model using the registry
    mv = m.version(modelversion)
    
    input_table_name=table_name
    pred_col = f'{modelversion}_PREDICTION'

    # Read the input table to a dataframe
    df = session.table(input_table_name)
    results = mv.run(df, function_name="predict").select("LOAN_ID",'"output_feature_0"').withColumnRenamed('"output_feature_0"', pred_col)
    # 'results' is the output DataFrame with predictions

    final = df.join(results, on="LOAN_ID", how="full")
    # Write results back to Snowflake table
    final.write.save_as_table(table_name, mode='overwrite',enable_schema_evolution=True)

    return "Success"

# Register the stored procedure
session.sproc.register(
    func=demo_inference_sproc,
    name="model_inference_sproc",
    replace=True,
    is_permanent=True,
    stage_location="@ML_STAGE",
    packages=['joblib', 'snowflake-snowpark-python', 'snowflake-ml-python'],
    return_type=StringType()
)

In [None]:
CALL model_inference_sproc('DEMO_MORTGAGE_LENDING_TRAIN_{{VERSION_NUM}}','{{model_name}}', '{{base_version_name}}');

In [None]:
CALL model_inference_sproc('DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}}','{{model_name}}', '{{base_version_name}}');

In [None]:
CALL model_inference_sproc('DEMO_MORTGAGE_LENDING_TRAIN_{{VERSION_NUM}}','{{model_name}}', '{{optimized_version_name}}');

In [None]:
CALL model_inference_sproc('DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}}','{{model_name}}', '{{optimized_version_name}}');

In [None]:
select TIMESTAMP, LOAN_ID, INCOME, LOAN_AMOUNT, XGB_BASE_PREDICTION, XGB_OPTIMIZED_PREDICTION, MORTGAGERESPONSE 
FROM DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}} 
limit 20

In [None]:
ALTER TABLE DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}}
ADD COLUMN IF NOT EXISTS LOAN_PURPOSE VARCHAR(50);


UPDATE DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}}
SET LOAN_PURPOSE = CASE
    WHEN LOAN_PURPOSE_NAME_HOME_IMPROVEMENT = 1 THEN 'HOME_IMPROVEMENT'
    WHEN LOAN_PURPOSE_NAME_HOME_PURCHASE = 1 THEN 'HOME_PURCHASE'
    WHEN LOAN_PURPOSE_NAME_REFINANCING = 1 THEN 'REFINANCING'
    ELSE 'OTHER'
END;

In [None]:
ALTER TABLE DEMO_MORTGAGE_LENDING_TRAIN_{{VERSION_NUM}}
ADD COLUMN IF NOT EXISTS LOAN_PURPOSE VARCHAR(50);


UPDATE DEMO_MORTGAGE_LENDING_TRAIN_{{VERSION_NUM}}
SET LOAN_PURPOSE = CASE
    WHEN LOAN_PURPOSE_NAME_HOME_IMPROVEMENT = 1 THEN 'HOME_IMPROVEMENT'
    WHEN LOAN_PURPOSE_NAME_HOME_PURCHASE = 1 THEN 'HOME_PURCHASE'
    WHEN LOAN_PURPOSE_NAME_REFINANCING = 1 THEN 'REFINANCING'
    ELSE 'OTHER'
END;

In [None]:
SELECT LOAN_PURPOSE_NAME_HOME_PURCHASE, LOAN_PURPOSE_NAME_HOME_IMPROVEMENT, LOAN_PURPOSE_NAME_REFINANCING, LOAN_PURPOSE FROM DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}} limit 10;

In [None]:
CREATE OR REPLACE MODEL MONITOR MORTGAGE_LENDING_BASE_MODEL_MONITOR
WITH
    MODEL={{model_name}}
    VERSION={{base_version_name}}
    FUNCTION=predict
    SOURCE=DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}}
    BASELINE=DEMO_MORTGAGE_LENDING_TRAIN_{{VERSION_NUM}}
    TIMESTAMP_COLUMN=TIMESTAMP
    PREDICTION_CLASS_COLUMNS=(XGB_BASE_PREDICTION)  
    ACTUAL_CLASS_COLUMNS=(MORTGAGERESPONSE)
    ID_COLUMNS=(LOAN_ID)
    SEGMENT_COLUMNS = ('LOAN_PURPOSE')
    WAREHOUSE={{COMPUTE_WAREHOUSE}}
    REFRESH_INTERVAL='12 hours'
    AGGREGATION_WINDOW='1 day';

In [None]:
CREATE OR REPLACE MODEL MONITOR MORTGAGE_LENDING_OPTIMIZED_MODEL_MONITOR
WITH
    MODEL={{model_name}}
    VERSION={{optimized_version_name}}
    FUNCTION=predict
    SOURCE=DEMO_MORTGAGE_LENDING_TEST_{{VERSION_NUM}}
    BASELINE=DEMO_MORTGAGE_LENDING_TRAIN_{{VERSION_NUM}}
    TIMESTAMP_COLUMN=TIMESTAMP
    PREDICTION_CLASS_COLUMNS=(XGB_OPTIMIZED_PREDICTION)  
    ACTUAL_CLASS_COLUMNS=(MORTGAGERESPONSE)
    ID_COLUMNS=(LOAN_ID)
    SEGMENT_COLUMNS = ('LOAN_PURPOSE')
    WAREHOUSE={{COMPUTE_WAREHOUSE}}
    REFRESH_INTERVAL='12 hours'
    AGGREGATION_WINDOW='1 day';