In [None]:
import boto3
import io
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer, json_deserializer
import smutil

In [None]:
#! aws s3 cp file_from_path file_to_path

In [None]:
data_dictionary = {
    "X_train" : X_train_scaled,
    "X_test" : X_test_scaled,
    "y_train" : y_train,
    "y_test" : y_test
}

In [None]:
bucket = "lendingGenie-s3"
prefix = "lendingGenie/linear-learner"
role = get_execution_role()
model_type = "linear-learner"
instance_type = "ml.m4.xlarge"
n_features = 40 # put here number of features
n_epochs = 100 # put here number of epochs

In [None]:
# store data in S3 buckets
s3_train_data,s3_test_data = smutil.store_data_s3(bucket, prefix, role, data_dictionary)

In [None]:
# set hyper parameters in this section
# for logistic regression: 
l_hyperparams = {
    "feature_dim": n_features,
    "predictor_type": "binary_classifier",
    "epochs": n_epochs,
}
        
# Binary classifier with automated threshold tuning
le_hyperparams = {
    "feature_dim": n_features,
    "predictor_type": "binary_classifier",
    "binary_classifier_model_selection_criteria": "precision_at_target_recall",
    "target_recall": 0.9,
    "epochs": n_epochs,
}

# Binary classifier with class weights and automated threshold tuning
leb_hyperparams = {
    "feature_dim": n_features,
    "predictor_type": "binary_classifier",
    "binary_classifier_model_selection_criteria": "precision_at_target_recall",
    "target_recall": 0.9,
    "positive_example_weight_mult": "balanced",
    "epochs": n_epochs,
}

# Linear SVM
lsvc_hyperparams = {
    "feature_dim": n_features,
    "predictor_type": "binary_classifier",
    "loss": "hinge_loss",
    "binary_classifier_model_selection_criteria": "precision_at_target_recall",
    "target_recall": 0.9,
    "epochs": n_epochs,
}

# Linear SVM with balanced class weights
lsvcb_hyperparams = {
    "feature_dim": n_features,
    "predictor_type": "binary_classifier",
    "loss": "hinge_loss",
    "binary_classifier_model_selection_criteria": "precision_at_target_recall",
    "target_recall": 0.9,
    "positive_example_weight_mult": "balanced",
    "epochs": n_epochs,
}


In [None]:
# create model learners in this section
l_model = smutil.create_model(bucket, prefix, role,model_type, instance_type,l_hyperparams)
le_model = smutil.create_model(bucket, prefix, role,model_type, instance_type,le_hyperparams)
leb_model = smutil.create_model(bucket, prefix, role,model_type, instance_type,leb_hyperparams)
lsvc_model = smutil.create_model(bucket, prefix, role,model_type, instance_type,lsvc_hyperparams)
lsvcb_model = smutil.create_model(bucket, prefix, role,model_type, instance_type,lsvcb_hyperparams)

In [None]:
# fit the model
l_model = smutil.fit_model(l_model, s3_train_data, s3_test_data)
le_model = smutil.fit_model(le_model, s3_train_data, s3_test_data)
leb_model = smutil.fit_model(leb_model, s3_train_data, s3_test_data)
lsvc_model = smutil.fit_model(lsvc_model, s3_train_data, s3_test_data)
lsvcb_model = smutil.fit_model(lsvcb_model, s3_train_data, s3_test_data)

In [None]:
# deploy model
l_predictor = smutil.deploy_model(l_model, instance_type)
le_predictor = smutil.deploy_model(le_model, instance_type)
leb_predictor = smutil.deploy_model(leb_model, instance_type)
lsvc_predictor = smutil.deploy_model(lsvc_model, instance_type)
lsvcb_predictor = smutil.deploy_model(lsvcb_model, instance_type)

In [None]:
predictors = {
    "Logistic": l_predictor,
    "Logistic with auto threshold": l_predictor,
    "Logistic with class weights": leb_predictor,
    "Linear SVC": lsvc_predictor,
    "Linear SVC with class weights": lsvcb_predictor,
}
metrics = {
    key: evaluate(predictor, X_test_scaled, y_test, key, False)
    for key, predictor in predictors.items()
}
pd.set_option("display.float_format", lambda x: "%.3f" % x)
display(
    pd.DataFrame(list(metrics.values())).loc[:, ["Model", "Recall", "Precision", "Accuracy", "F1"]]
)

In [None]:
for key, predictor in predictors.items():
    smutil.delete_endpoint(predictor)