# Model Training

This notebook defines two models, a SVM and an ANN, to predict whether a user will accept an offer or not.

In [1]:
import pandas as pd
import numpy as np
import math
import json
import seaborn as sns
import matplotlib.pyplot as plt
import os

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

#from tensorflow import keras
#from tensorflow.keras import layers, activations
#import tensorflow as tf

import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.tensorflow import TensorFlow


sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

#import warnings
#warnings.filterwarnings('ignore')


In [2]:
def get_data(key,criterion):
    """
        get .csv from sagemaker and specify Y and X datasets
    """
    data_location = f's3://sagemaker-eu-central-1-855372857567/sagemaker/capstone-project/{key}'
    data = pd.read_csv(data_location, names=None)
    y = data.loc[:, [criterion]]
    x = data.drop(criterion, axis=1)
    y = y.values.ravel()
    #x = x.values.ravel()
    return x, y

In [3]:
# get data for testing
test_data_key = 'test.csv'
x_test, y_test = get_data(test_data_key, 'event_offer completed')

Unnamed: 0,event_offer received,event_offer viewed,event_transaction,difficulty,duration,mobile,social,web,offer_bogo,offer_discount,...,"age_(39, 49]","age_(49, 59]","age_(59, 69]","age_(69, 79]","age_(79, 89]","age_(89, 99]","age_(99, 109]","income_(30000, 60000]","income_(60000, 90000]","income_(90000, 120000]"
0,0,0,1,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0,1,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,0,1,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0,0,1,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0,0,1,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61302,0,0,0,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
61303,0,1,0,0.35,0.571429,1.0,1.0,1.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
61304,0,0,0,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
61305,0,0,1,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Support Vector Machine

In [4]:
# show code for SVM model
!pygmentize ../model/SVM_model.py

[34mfrom[39;49;00m [04m[36m__future__[39;49;00m [34mimport[39;49;00m print_function

[34mimport[39;49;00m [04m[36margparse[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m

[37m# sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. [39;49;00m
[37m# from sklearn.externals import joblib[39;49;00m
[37m# Import joblib package directly[39;49;00m
[34mimport[39;49;00m [04m[36mjoblib[39;49;00m

[37m## Import any additional libraries you need to define a model[39;49;00m
[34mfrom[39;49;00m [04m[36msklearn[39;49;00m[04m[36m.[39;49;00m[04m[36msvm[39;49;00m [34mimport[39;49;00m SVC
[37m#from sklearn.svm import LinearSVC[39;49;00m

[37m# Provided model load function[39;49;00m
[34mdef[39;49;00m [32mmodel_fn[39;49;00m(model_dir):
    [33m"""Load model from the model_dir. This is the same model that is saved[39;49;00m
[33m    in th

In [5]:
# input path for data and output path for model
input_path  = 's3://sagemaker-eu-central-1-855372857567/sagemaker/capstone-project'
output_path = f'{input_path}/model'

In [11]:
# print loaded role
print(role)

# create SVM estimator
svm_estimator = SKLearn(entry_point='SVM_model.py',
                        source_dir='../model/',
                        role=role,
                        instance_count=1,
                        instance_type='ml.m4.xlarge',
                        sagemaker_session=sagemaker_session,
                        py_version="py3",
                        framework_version='0.23-1',
                        output_path=output_path)


arn:aws:iam::855372857567:role/service-role/AmazonSageMaker-ExecutionRole-20210810T203812


In [None]:
%%time

# fit SVM estimator
svm_estimator.fit({'train': input_path})

2021-09-10 12:28:46 Starting - Starting the training job...
2021-09-10 12:29:09 Starting - Launching requested ML instancesProfilerReport-1631276925: InProgress
...
2021-09-10 12:29:34 Starting - Preparing the instances for training.........
2021-09-10 12:31:16 Downloading - Downloading input data
2021-09-10 12:31:16 Training - Downloading the training image.....[34m2021-09-10 12:31:55,086 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-09-10 12:31:55,089 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-09-10 12:31:55,102 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-09-10 12:31:55,419 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-09-10 12:31:58,446 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-09-10 12:31:58,462 sagemaker-training-toolkit IN

In [None]:
%%time

# deply SVM estimator
svm_predictor = svm_estimator.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

In [None]:
#use SVM estimator to predict test set
svm_predictions = svm_predictor.predict(x_test)

In [None]:
print(svm_predictions)
print(y_test)

In [None]:
# get confusion matrix and display it
cm = confusion_matrix(y_test, svm_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot(values_format="n")

# get tn, fp, fn, tp
tn, fp, fn, tp = cm.flatten()

# calculate accuracy, recall, precision
acc = accuracy_score(np.round(svm_predictions), y_test)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f'Recall:    {recall}')
print(f'Precision: {precision}')
print(f'Accuracy:  {round(acc,4)*100}%')

In [None]:
# delete endpoint
svm_predictor.delete_endpoint()

### Neural Network

In [47]:
!pygmentize ../model/ANN_model.py

In [None]:

# create tf estimatr
tf_estimator = TensorFlow(entry_point='ANN_model.py',
                          source_dir='../model/',
                          role=role,
                          instance_count=1, 
                          instance_type='ml.p2.xlarge',
                          framework_version=tf_version, 
                          py_version='py3',
                          script_mode=True,
                          output_path=output_path
                          hyperparameters={
                              'epochs': 15
                          }
                         )

In [None]:
# fit ANN. input path is the same as for SVM
tf_estimator.fit({'train': input_path})

In [None]:
tf_predictor = tf_estimator.deploy(initial_instance_count = 1, instance_type = 'ml.p2.xlarge')

In [None]:
tf_predictions = tf_predictor.predict(X_test)

In [None]:
# get confusion matrix and display it
cm = confusion_matrix(y_test, tf_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot(values_format="n")

# get tn, fp, fn, tp
tn, fp, fn, tp = cm.flatten()

# calculate accuracy, recall, precision
acc = accuracy_score(np.round(tf_predictions), y_test)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f'Recall:    {recall}')
print(f'Precision: {precision}')
print(f'Accuracy:  {round(acc,4)*100}%')

## Comparison of models & Results