# SOLUTION: Model Building, Scoring & Evaluating (LightGBM)

## Imports

In [None]:
from snowflake.snowpark.session import Session
import snowflake.snowpark.types as T

from snowflake.ml.modeling.lightgbm import LGBMClassifier
from snowflake.ml.modeling.metrics import *

import json
import pandas as pd
import seaborn as sns
import os

## Create Snowpark Session

In [None]:
os.chdir('/Users/nicholasaverillo/Documents/MachineLearning/sfguide-getting-started-machine-learning/hol/')
print(os.getcwd())
with open('creds.json') as f:
    connection_parameters = json.load(f)

In [None]:
session = Session.builder.configs(connection_parameters).create()
print(f"Current Database and schema: {session.get_fully_qualified_current_schema()}")
print(f"Current Warehouse: {session.get_current_warehouse()}")

# Model Building

In [None]:
train_sdf = session.table('CREDIT_RISK_PREPARED_BALANCED_TRAIN')
test_sdf = session.table('CREDIT_RISK_PREPARED_BALANCED_TEST')

# SOLUTION: Train a LightGBM Model

In [None]:
feature_cols = train_sdf.columns
feature_cols.remove('TARGET')
feature_cols.remove('ID')
target_col = 'TARGET'

lgbmodel = LGBMClassifier(
    input_cols=feature_cols,
    label_cols=target_col,
    output_cols='PREDICTION'
    )
lgbmodel.fit(train_sdf)

The fitted model can be retrieved as an XGB object

In [None]:
lgbmodel_local = lgbmodel.to_lightgbm()

In [None]:
# Plot feature importance
feat_importance = pd.DataFrame(lgbmodel_local.feature_importances_,lgbmodel_local.feature_name_,columns=['FeatImportance'])
feat_importance.sort_values('FeatImportance').plot.barh(y='FeatImportance', figsize=(5,15))

# Model Scoring

# SOLUTION: Use the fitted LightGBM Model to score a Snowpark DataFrame

In [None]:
scored_sdf = lgbmodel.predict(test_sdf)
scored_sdf.write.save_as_table(table_name='CREDIT_RISK_PREPARED_BALANCED_TEST_SCORED', mode='overwrite')
session.table('CREDIT_RISK_PREPARED_BALANCED_TEST_SCORED').show()

# Model Evaluation

# Solution: Evaluate model performance

In [None]:
print('Acccuracy:', accuracy_score(df=scored_sdf, y_true_col_names='TARGET', y_pred_col_names='PREDICTION'))
print('Precision:', precision_score(df=scored_sdf, y_true_col_names='TARGET', y_pred_col_names='PREDICTION'))
print('Recall:', recall_score(df=scored_sdf, y_true_col_names='TARGET', y_pred_col_names='PREDICTION'))
print('F1:', f1_score(df=scored_sdf, y_true_col_names='TARGET', y_pred_col_names='PREDICTION'))

# Obtaining and plotting a simple confusion matrix
cf_matrix = confusion_matrix(df=scored_sdf, y_true_col_name='TARGET', y_pred_col_name='PREDICTION')

sns.heatmap(cf_matrix, annot=True, fmt='.0f', cmap='Blues')

In [None]:
session.close()