In [1]:
import re

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
)
from sklearn.model_selection import train_test_split
from snowflake.ml.registry import Registry
from snowflake.snowpark import Session
from snowflake.snowpark.context import get_active_session
from xgboost import XGBClassifier

from common import get_next_model_version

try:
    session = get_active_session()
except:
    session = Session.builder.create()

DB = "DEMO"
SCHEMA = "PUBLIC"
COMPUTE_WAREHOUSE = "DEMO_WH"
model_name = "MORTGAGE_LENDING_MLOPS"

In [None]:
try:
    print("Reading table data...")
    df = session.table(f"{DB}.{SCHEMA}.MORTGAGE_LENDING_DEMO_DATA").to_pandas()
    print(df.head())
except:
    print("Table not found! Uploading data to snowflake table")
    df_pandas = pd.read_csv("MORTGAGE_LENDING_DEMO_DATA.csv.zip")
    session.write_pandas(
        df_pandas, "MORTGAGE_LENDING_DEMO_DATA", auto_create_table=True
    )
    df = session.table(f"{DB}.{SCHEMA}.MORTGAGE_LENDING_DEMO_DATA")
    print(df.head())

: 

In [None]:
df_ohe = pd.get_dummies(
    df, columns=["LOAN_TYPE_NAME", "LOAN_PURPOSE_NAME"], drop_first=True
)

# Convert all boolean columns to integers
df_ohe = df_ohe.apply(lambda x: x.astype(int) if x.dtype == "bool" else x)
df_ohe.columns = [re.sub(r"[^a-zA-Z0-9]+", "_", col.upper()) for col in df_ohe.columns]

df_ohe = df_ohe.dropna(subset=["APPLICANT_INCOME_000S"])
df_ohe.head()

: 

In [None]:
x = df_ohe.drop(["MORTGAGERESPONSE", "LOAN_ID", "TS", "COUNTY_NAME"], axis=1)
y = df_ohe.MORTGAGERESPONSE

xtrain, xtest, ytrain, ytest = train_test_split(
    x, y, train_size=0.70, random_state=1234
)

# Model params
xgb_base = XGBClassifier(
    max_depth=50, n_estimators=3, learning_rate=0.75, booster="gbtree"
)

# Fit model
xgb_base.fit(xtrain, ytrain)

: 

In [None]:
y_pred_train = xgb_base.predict(xtrain)
y_pred_proba_train = xgb_base.predict_proba(xtrain)[:, 1]

accuracy = accuracy_score(ytrain, y_pred_train)
precision = precision_score(ytrain, y_pred_train, average="weighted")
recall = recall_score(ytrain, y_pred_train, average="weighted")
f1 = f1_score(ytrain, y_pred_train, average="weighted")

metrics_train = {
    "Accuracy": accuracy,
    "Precision": precision,
    "Recall": recall,
    "F1 Score": f1,
}

y_pred = xgb_base.predict(xtest)
y_pred_proba = xgb_base.predict_proba(xtest)[:, 1]

accuracy = accuracy_score(ytest, y_pred)
precision = precision_score(ytest, y_pred, average="weighted")
recall = recall_score(ytest, y_pred, average="weighted")
f1 = f1_score(ytest, y_pred, average="weighted")

metrics_test = {
    "Accuracy": accuracy,
    "Precision": precision,
    "Recall": recall,
    "F1 Score": f1,
}
print(metrics_train)
print(metrics_test)

: 

In [None]:
sample_data = x.sample(n=1)

reg = Registry(session=session, database_name=DB, schema_name=SCHEMA)
df = reg.show_models()
next_version = get_next_model_version(df, model_name)
print(next_version)

mortgage_model = reg.log_model(
    model_name=model_name,
    version_name=next_version,
    model=xgb_base,
    sample_input_data=sample_data,
    metrics=metrics_test,
    target_platforms=["WAREHOUSE"],
    conda_dependencies=["xgboost==3.0.1"],
    options={"relax_version": True},
)

: 

In [None]:
print(f"\nROC AUC Score: {roc_auc_score(ytest, y_pred_proba):.4f}")

print("\nConfusion Matrix:")
cm = confusion_matrix(ytest, y_pred)
plt.figure(figsize=(2, 1.5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.ylabel("Actual")
plt.xlabel("Predicted")
plt.show()

: 

In [None]:
session.write_pandas(
    xtest, "MORTGAGE_TEST", database=DB, schema=SCHEMA, auto_create_table=True
)

: 

In [None]:
m = reg.get_model(model_name)
recent_model_name = reg.get_model(model_name).last().version_name
m.default = recent_model_name
m.default

: 

In [None]:
sql_predict = session.sql(
    f"""
    select *, round({DB}.{SCHEMA}.MORTGAGE_LENDING_MLOPS!predict_proba(
    APPLICANT_INCOME_000S,
    LOAN_AMOUNT_000S,
    LOAN_TYPE_NAME_FHA_INSURED,
    LOAN_TYPE_NAME_FSA_RHS_GUARANTEED,
    LOAN_TYPE_NAME_VA_GUARANTEED,
    LOAN_PURPOSE_NAME_HOME_PURCHASE,
    LOAN_PURPOSE_NAME_REFINANCING
):output_feature_0,2)
as pred_response
from {DB}.{SCHEMA}.MORTGAGE_TEST
"""
)

sql_predict.show()

: 