<p style="font-size:40px">6. Model Deployment</p> <br>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import make_scorer, plot_confusion_matrix, confusion_matrix, classification_report, f1_score, precision_score, recall_score, matthews_corrcoef, roc_auc_score

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import RobustScaler, binarize
from imblearn.over_sampling import SMOTE
from sklearn.metrics import precision_recall_curve
from sklearn.preprocessing import binarize, OneHotEncoder

from sklearn.compose import ColumnTransformer
from imblearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

import pickle
import warnings
warnings.filterwarnings("ignore")

In [2]:
bank = pd.read_csv('bank_clean.csv')

X = bank.drop('y', axis = 1)
y = bank['y'].map({'yes' : 1, 'no' : 0})

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 101)

In [3]:
numeric_transformer = Pipeline(steps=[
    ('scaler', RobustScaler())])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, ['emp.var.rate','euribor3m','nr.employed','cons.price.idx','cons.conf.idx','age']),
        ('cat', categorical_transformer, ['month','pdays'])])

In [5]:
sm = SMOTE(random_state=101, sampling_strategy=1.0)

In [6]:
rfc_tuned_smote = Pipeline(steps=[('preprocessor', preprocessor), ('sm', sm),
                      ('rfc', RandomForestClassifier(random_state = 101, n_estimators = 300,
                                                   max_features = 0.8, min_samples_leaf = 35, 
                                                     max_depth = 8))])

rfc_tuned_smote.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  Pipeline(memory=None,
                                                           steps=[('scaler',
                                                                   RobustScaler(copy=True,
                                                                                quantile_range=(25.0,
                                                                                                75.0),
                                                                                with_centering=True,
                                                                                with_scaling=True))],
                                                        

In [9]:
y_proba_rfc_tuned = rfc_tuned_smote.predict_proba(X_test)[:,1]
y_pred_rfc_tuned = binarize([y_proba_rfc_tuned], 0.33)[0]

In [10]:
confusion_matrix(y_test, y_pred_rfc_tuned)

array([[5285, 1994],
       [ 276,  683]])

In [11]:
print(classification_report(y_test, y_pred_rfc_tuned))

              precision    recall  f1-score   support

           0       0.95      0.73      0.82      7279
           1       0.26      0.71      0.38       959

    accuracy                           0.72      8238
   macro avg       0.60      0.72      0.60      8238
weighted avg       0.87      0.72      0.77      8238



# Pickle write

In [14]:
filename = 'randomforest.sav'
pickle.dump(rfc_tuned_smote, open(filename, 'wb'))

# Pickle read

In [3]:
loadModel = pickle.load(open('randomforest.sav', 'rb'))

In [4]:
loadModel

Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  Pipeline(memory=None,
                                                           steps=[('scaler',
                                                                   RobustScaler(copy=True,
                                                                                quantile_range=(25.0,
                                                                                                75.0),
                                                                                with_centering=True,
                                                                                with_scaling=True))],
                                                        

In [5]:
sample = X_test.sample(1, random_state = 101)

In [6]:
loadModel.predict_proba(sample)[:,1]

array([0.2808091])

In [7]:
sample

Unnamed: 0,age,job,marital,education,default,month,campaign,pdays,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
29559,33,blue-collar,married,high.school,no,apr,1,Never contacted before,nonexistent,-1.8,93.075,-47.1,1.405,5099.1


In [8]:
X_test.head(1)

Unnamed: 0,age,job,marital,education,default,month,campaign,pdays,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
3669,32,services,married,high.school,no,may,1,Never contacted before,nonexistent,1.1,93.994,-36.4,4.859,5191.0


In [9]:
import pandas as pd

# Try 

In [14]:
sample1 = pd.DataFrame({
    'age' : [30],
    'job' : ['admin.'],
    'marital' : ['married'],
    'education' : ['high.school'],
    'default' : ['no'],
    'month' : ['may'],
    'campaign' : [1],
    'pdays' : ['Never contacted before'],
    'poutcome' : ['nonexistent'],
    'emp.var.rate' : [1.1],
    'cons.price.idx' : [93.075],
    'cons.conf.idx' : [-36.4],
    'euribor3m' : [4.859],
    'nr.employed' : [5099.1]
})

In [15]:
loadModel.predict_proba(sample1)[:,1]

array([0.31218358])