# <font color='#00008B'>  <center> Machine and Deep Learning Solutions for Bank Direct Marketing Campaigns </center> </font>

### Importing Libraries & Data Uploading

In [1]:
import warnings
warnings.filterwarnings("ignore") #To filter the future warnings. 

# Libraries to help with reading and manipulating data
import pandas as pd
import numpy as np

# Libaries to help with data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Removes the limit from the number of displayed columns and rows.
pd.set_option("display.max_columns", None)
# pd.set_option('display.max_rows', None)
pd.set_option("display.max_rows", 200)

from sklearn.preprocessing import MinMaxScaler
# Library to split data
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

from sklearn.model_selection import KFold

In [2]:
# Get Final Dataset for Prediction
df = pd.read_csv(r"datasets/processed_data_final.csv")
df.head()

Unnamed: 0,age,education,duration,campaign,pdays,previous,cons.price.idx,cons.conf.idx,nr.employed,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_single,marital_unknown,housing_yes,default_no,default_yes,loan_unknown,loan_yes,month_apr,month_dec,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,contact_telephone,day_of_week_fri,day_of_week_mon,day_of_week_thu,day_of_week_wed,poutcome_failure,poutcome_success,y
0,0.75,0.285714,0.40528,0.0,0.0,0,0.698753,0.685714,0.859735,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0
1,0.769231,0.714286,0.231366,0.0,0.0,0,0.698753,0.685714,0.859735,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0
2,0.384615,0.714286,0.350932,0.0,0.0,0,0.698753,0.685714,0.859735,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0
3,0.442308,0.428571,0.234472,0.0,0.0,0,0.698753,0.685714,0.859735,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0
4,0.75,0.714286,0.476708,0.0,0.0,0,0.698753,0.685714,0.859735,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0


In [3]:
df.shape

(35165, 45)

### 4.5 Dataset Balancing using Synthetic Minority Oversampling (SMOTE) Technique 

In [4]:
X_raw = df.drop(['y'], axis=1)
X_raw.shape

(35165, 44)

In [5]:
y_raw = df['y']
y_raw.shape

(35165,)

In [6]:
y_raw.value_counts()

0    32435
1     2730
Name: y, dtype: int64

In [7]:
# Synthetic Minority Oversampling (SMOTE) technique was used in this study to obtain a balanced dataset. This is due to its simplicity and effectiveness [Ref 1].
# Ref 1: Chaurasia, Priyanka, et al. "Modelling assistive technology adoption for people with dementia." Journal of biomedical informatics 63 (2016): 235-248.

over = SMOTE(sampling_strategy=0.4, random_state=100)
under = RandomUnderSampler(sampling_strategy=1, random_state=100)
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)
# transform the dataset
X, y = pipeline.fit_resample(X_raw, y_raw)

In [8]:
X.shape

(25948, 44)

In [9]:
y.value_counts()

0    12974
1    12974
Name: y, dtype: int64

## <font color='#00008B'> 5. Building and Tunning the Classifiers

In [10]:
# Helper functions
def partial_gini(y_actual, y_pred):
    # If the predictions y_pred are binary class probabilities
    if y_pred.ndim == 2:
        if y_pred.shape[1] == 2:
            y_pred = y_pred[:, 1]
    gini = lambda a, p: 2 * metrics.roc_auc_score(a, p) - 1
    return gini(y_actual, y_pred) / gini(y_actual, y_actual)


def get_scores(y_test, y_pred):
    auc = metrics.roc_auc_score(y_test, y_pred)
    brier_score = 1-metrics.brier_score_loss(y_test, y_pred)
    gini = partial_gini(y_test, y_pred)
    return auc, brier_score, gini

acc_table= {}
df_acc_kfold = pd.DataFrame()
df_brier_kfold = pd.DataFrame()
df_gini_kfold = pd.DataFrame()

### 5.1 Conventional Methods
#### 5.1.1 Model 1-1 : Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
model_1_1 = LogisticRegression(random_state=1)

cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
# enumerate splits
result_acc = []
result_brier = []
result_gini = []
k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # defining the model
    model = LogisticRegression(random_state=1)
    model.fit(X_train,y_train)
    # evaluating the model on the hold out dataset
    yhat = model.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the result
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1
    
df_acc_kfold["Conv-Logistic"]=result_acc
df_brier_kfold["Conv-Logistic"]=result_brier
df_gini_kfold["Conv-Logistic"]=result_gini
# summarize the estimated performance of the model
print("\n***  Conventional - Logistic Regression  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.8924461705150808 , brier score =  0.892485549132948 , partial gini =  0.7848923410301616
K =  2 , auc =  0.897525414660246 , brier score =  0.8971098265895954 , partial gini =  0.7950508293204921
K =  3 , auc =  0.896094904646267 , brier score =  0.8959537572254335 , partial gini =  0.7921898092925339
K =  4 , auc =  0.8971138208001634 , brier score =  0.8971098265895954 , partial gini =  0.7942276416003269
K =  5 , auc =  0.8955768497409573 , brier score =  0.8955684007707129 , partial gini =  0.7911536994819146
K =  6 , auc =  0.8951203039407515 , brier score =  0.8951830443159923 , partial gini =  0.790240607881503
K =  7 , auc =  0.9021794954098993 , brier score =  0.902504816955684 , partial gini =  0.8043589908197986
K =  8 , auc =  0.8938558944176922 , brier score =  0.8944123314065511 , partial gini =  0.7877117888353844
K =  9 , auc =  0.8929046785951434 , brier score =  0.8928296067848882 , partial gini =  0.7858093571902869
K =  10 , auc =  0.90031988142142

#### 5.1.2 Model 1-2 : Decision Tree

In [12]:
# Decision Tree Libraries 
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.model_selection import GridSearchCV

# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []

k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=1)
    # defining the model
    dtree_estimator = DecisionTreeClassifier(random_state=1) 
    # Grid of parameters to choose from
    parameters = {"ccp_alpha": [0.01, 0.1, 0.2, 0.3, 0.4, 0.5], 
              'min_samples_leaf': [3, 4, 5, 6, 7,8]}
    acc_scorer = metrics.make_scorer(metrics.roc_auc_score)
    # Running the grid search
    grid_obj = GridSearchCV(dtree_estimator, parameters, scoring=acc_scorer, cv=cv_inner, refit=True)
    grid_obj = grid_obj.fit(X_train, y_train)
    # Set the clf to the best combination of parameters
    model_1_2 = grid_obj.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_1_2.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the result
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1
    
df_acc_kfold["Conv-DecisionTree"]=result_acc
df_brier_kfold["Conv-DecisionTree"]=result_brier
df_gini_kfold["Conv-DecisionTree"]=result_gini

# summarize the estimated performance of the model
print("\n***  Conventional - Decision Tree  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.8861606188192916 , brier score =  0.8863198458574181 , partial gini =  0.7723212376385833
K =  2 , auc =  0.8856607811663991 , brier score =  0.8840077071290944 , partial gini =  0.7713215623327982
K =  3 , auc =  0.8862613281565578 , brier score =  0.8859344894026975 , partial gini =  0.7725226563131156
K =  4 , auc =  0.8863282934542557 , brier score =  0.8863198458574181 , partial gini =  0.7726565869085114
K =  5 , auc =  0.8898067485354968 , brier score =  0.8897880539499037 , partial gini =  0.7796134970709936
K =  6 , auc =  0.8627152230550179 , brier score =  0.8628131021194605 , partial gini =  0.7254304461100358
K =  7 , auc =  0.845943627713224 , brier score =  0.8477842003853564 , partial gini =  0.6918872554264479
K =  8 , auc =  0.8818129124308899 , brier score =  0.8832369942196532 , partial gini =  0.7636258248617798
K =  9 , auc =  0.8798687189876548 , brier score =  0.8797224363916731 , partial gini =  0.7597374379753097
K =  10 , auc =  0.8843513865

### 5.2 Ensemble Methods
#### 5.2.1 Model 2-1 : Randome Forest

In [13]:
from sklearn.ensemble import RandomForestClassifier

# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []

k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=1)
    # defining the model
    rf_estimator_weighted = RandomForestClassifier(random_state=1)
    # Grid of parameters to choose from
    parameters= {"n_estimators": [100, 250, 500, 750], "max_samples": [0.1, 0.5, 1, 4]}
    # Type of scoring used to compare parameter combinations
    acc_scorer = metrics.make_scorer(metrics.roc_auc_score)
    # # Run the grid search
    grid_obj = GridSearchCV(rf_estimator_weighted, parameters, scoring=acc_scorer, cv=cv_inner, refit=True)
    grid_obj = grid_obj.fit(X_train, y_train)
    # Set the clf to the best combination of parameters
    model_2_1 = grid_obj.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_2_1.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the results
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1
    
df_acc_kfold["Ensemble-RandomForest"]=result_acc
df_brier_kfold["Ensemble-RandomForest"]=result_brier
df_gini_kfold["Ensemble-RandomForest"]=result_gini

# summarize the estimated performance of the model
print("\n***  Ensemble - RandomForest  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.9432565828553506 , brier score =  0.9433526011560693 , partial gini =  0.8865131657107013
K =  2 , auc =  0.9393124665596577 , brier score =  0.9387283236994219 , partial gini =  0.8786249331193154
K =  3 , auc =  0.9419632461613767 , brier score =  0.941811175337187 , partial gini =  0.8839264923227534
K =  4 , auc =  0.9410480271528583 , brier score =  0.9410404624277456 , partial gini =  0.8820960543057166
K =  5 , auc =  0.937967551051199 , brier score =  0.9379576107899807 , partial gini =  0.875935102102398
K =  6 , auc =  0.9428730028048942 , brier score =  0.9429672447013487 , partial gini =  0.8857460056097883
K =  7 , auc =  0.9481063605172869 , brier score =  0.948747591522158 , partial gini =  0.8962127210345738
K =  8 , auc =  0.9429329409666488 , brier score =  0.9437379576107899 , partial gini =  0.8858658819332976
K =  9 , auc =  0.936838400825589 , brier score =  0.9367771781033154 , partial gini =  0.873676801651178
K =  10 , auc =  0.942428481699309

#### 5.2.2 Model 2-2 : XGBoost

In [15]:
## XGBoost libary for building XGBoost Classifier
from xgboost import XGBClassifier

# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []
k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=1)
    # defining the model
    model = XGBClassifier(random_state=1)
    # Grid of parameters to choose from
    parameters = {
                 "n_estimators": [50, 100, 150],
                 "max_depth":[1, 2, 3],
                 "learning_rate":[0.3, 0.4],
                 "colsample_bytree":[0.5, 0.75, 1],
                 "subsample":[0.6, 0.8]
     }
    # Type of scoring used to compare parameter combinations
    acc_scorer = metrics.make_scorer(metrics.roc_auc_score)
    # # Run the grid search
    grid_obj = GridSearchCV(model, parameters, scoring=acc_scorer, cv=cv_inner, refit=True)
    grid_obj = grid_obj.fit(X_train, y_train)
    # Set the clf to the best combination of parameters
    model_2_2 = grid_obj.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_2_2.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the results
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1    
    
df_acc_kfold["Ensemble-XGB"]=result_acc
df_brier_kfold["Ensemble-XGB"]=result_brier
df_gini_kfold["Ensemble-XGB"]=result_gini

# summarize the estimated performance of the model
print("\n***  Ensemble - XGBClassifier  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.9441315644409385 , brier score =  0.9441233140655106 , partial gini =  0.888263128881877
K =  2 , auc =  0.941729088639201 , brier score =  0.941811175337187 , partial gini =  0.883458177278402
K =  3 , auc =  0.9417939475447542 , brier score =  0.941811175337187 , partial gini =  0.8835878950895084
K =  4 , auc =  0.9398837901379621 , brier score =  0.9398843930635838 , partial gini =  0.8797675802759242
K =  5 , auc =  0.9364181060239761 , brier score =  0.9364161849710982 , partial gini =  0.8728362120479523
K =  6 , auc =  0.9417743895702132 , brier score =  0.941811175337187 , partial gini =  0.8835487791404264
K =  7 , auc =  0.9395690227044147 , brier score =  0.9394990366088632 , partial gini =  0.8791380454088293
K =  8 , auc =  0.9414169787765293 , brier score =  0.941811175337187 , partial gini =  0.8828339575530586
K =  9 , auc =  0.9421903432008099 , brier score =  0.9421742482652274 , partial gini =  0.8843806864016197
K =  10 , auc =  0.9365120434344012

### 5.3 Multilayer Perceptron Neural Networks - MLP

#### 5.3.1 Model 3-1 : MLP with one hidden layer - MLP1

In [20]:
# Use scikit-learn to grid search the batch size and epochs
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from scikeras.wrappers import KerasClassifier
# import BatchNormalization
from tensorflow.keras.layers import BatchNormalization

def create_model_one_hidden_layer(units, dropout_rate, L2, batch_normalization = "yes"):
    # create model
    model = Sequential()
    model.add(Dense(60, input_shape=(X_train.shape[1],)))
    model.add(Dense(units = units, kernel_regularizer=l2(l=L2)))
    if batch_normalization == "yes":
        model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [21]:
# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []

k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=1)
    # defining the model
    model = KerasClassifier(model=create_model_one_hidden_layer, epochs=10, batch_size=64, verbose=0)
    # Grid of parameters to choose from
    units = [5, 10, 15, 20]
    dropout_rate = [0, 0.25, 0.5]
    L2 = [0.1,0.001, 0]
    batch_normalization = ["yes"]
    learning_rate = [0.01, 0.001, 0.0001]
    param_grid = dict(model__units=units, model__dropout_rate = dropout_rate,
                      model__batch_normalization = batch_normalization, optimizer__learning_rate=learning_rate, 
                      model__L2 = L2)
    # Running the grid search
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv_inner, refit=True)
    grid_result = grid.fit(X_train, y_train)
    model_3_1 = grid_result.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_3_1.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the results
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1    
    
df_acc_kfold["MLP - MLP1"]=result_acc
df_brier_kfold["MLP - MLP1"]=result_brier
df_gini_kfold["MLP - MLP1"]=result_gini

# summarize the estimated performance of the model
print("\n***  MLP with one hidden layer - MLP1  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.8950877476441433 , brier score =  0.8951830443159923 , partial gini =  0.7901754952882867
K =  2 , auc =  0.8954744069912609 , brier score =  0.8951830443159923 , partial gini =  0.7909488139825218
K =  3 , auc =  0.8919488706297195 , brier score =  0.8917148362235068 , partial gini =  0.783897741259439
K =  4 , auc =  0.8948097007079273 , brier score =  0.8947976878612717 , partial gini =  0.7896194014158546
K =  5 , auc =  0.894804651720873 , brier score =  0.8947976878612717 , partial gini =  0.789609303441746
K =  6 , auc =  0.8928054643757062 , brier score =  0.8928709055876686 , partial gini =  0.7856109287514124
K =  7 , auc =  0.901045344632043 , brier score =  0.9021194605009634 , partial gini =  0.802090689264086
K =  8 , auc =  0.8877519172462993 , brier score =  0.8882466281310212 , partial gini =  0.7755038344925986
K =  9 , auc =  0.8952747990272222 , brier score =  0.8951426368542791 , partial gini =  0.7905495980544444
K =  10 , auc =  0.90055958430879

#### 5.3.2 Model 3-2 : MLP with Three hidden layers - MLP3

In [22]:
def create_model_three_hidden_layers(units_1, units_2, units_3, dropout_rate, L2, batch_normalization = "yes"):
    # create model
    model = Sequential()
    model.add(Dense(60, input_shape=(X_train.shape[1],)))
    model.add(Dense(units = units_1, kernel_regularizer=l2(l=L2)))
    model.add(Dense(units = units_2, kernel_regularizer=l2(l=L2)))
    model.add(Dense(units = units_3, kernel_regularizer=l2(l=L2)))
    if batch_normalization == "yes":
        model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [24]:
# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []
    
k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
    # defining the model
    model = KerasClassifier(model=create_model_three_hidden_layers, epochs=10, batch_size=64, verbose=0)
    # Grid of parameters to choose from
    units = [5, 10, 15, 20]
    # dropout_rate = [0, 0.25, 0.5] 
    dropout_rate = [0]
    # L2 = [0.1, 0.01, 0.001, 0] 
    L2 = [0.1]
    # batch_normalization = ["yes", "no"] 
    batch_normalization = ["no"]
    learning_rate = [0.01, 0.001, 0.0001] #learning_rate = [0.01]
    param_grid = dict(model__units_1 = units, model__units_2 = units, model__units_3 = units,
                  model__dropout_rate = dropout_rate,
                 model__batch_normalization = batch_normalization, 
                 optimizer__learning_rate=learning_rate, 
                 model__L2 = L2)
    # Running the grid search
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv_inner, refit=True)
    grid_result = grid.fit(X_train, y_train)
    model_3_2 = grid_result.best_estimator_
    # evaluating the model on the hold out dataset
    yhat =  model_3_2.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the results
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1

df_acc_kfold["MLP - MLP3"]=result_acc
df_brier_kfold["MLP - MLP3"]=result_brier
df_gini_kfold["MLP - MLP3"]=result_gini

# summarize the estimated performance of the model
print("\n***  MLP with Three hidden layers - MLP3  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.8966562397532706 , brier score =  0.8967244701348748 , partial gini =  0.7933124795065412
K =  2 , auc =  0.883458177278402 , brier score =  0.8836223506743738 , partial gini =  0.7669163545568041
K =  3 , auc =  0.8843981523283608 , brier score =  0.8843930635838151 , partial gini =  0.7687963046567217
K =  4 , auc =  0.8770586502216208 , brier score =  0.8770712909441233 , partial gini =  0.7541173004432415
K =  5 , auc =  0.8940419576764205 , brier score =  0.8940269749518305 , partial gini =  0.788083915352841
K =  6 , auc =  0.8782659909259714 , brier score =  0.8782273603082852 , partial gini =  0.7565319818519427
K =  7 , auc =  0.8997016612812337 , brier score =  0.9001926782273603 , partial gini =  0.7994033225624675
K =  8 , auc =  0.8850633137149991 , brier score =  0.8855491329479769 , partial gini =  0.7701266274299983
K =  9 , auc =  0.8906082714646892 , brier score =  0.8905165767154973 , partial gini =  0.7812165429293785
K =  10 , auc =  0.89313712194

#### 5.3.3  Model 3-3 : MLP with Five hidden layers - MLP5

In [29]:
def create_model_five_hidden_layers(units_1,units_2,units_3,units_4, units_5, dropout_rate, L2, batch_normalization = "yes"):
    # create model
    model = Sequential()
    model.add(Dense(60, input_shape=(X_train.shape[1],)))
    model.add(Dense(units = units_1, kernel_regularizer=l2(l=L2)))
    model.add(Dense(units = units_2, kernel_regularizer=l2(l=L2)))
    model.add(Dense(units = units_3, kernel_regularizer=l2(l=L2)))
    model.add(Dense(units = units_4, kernel_regularizer=l2(l=L2)))
    model.add(Dense(units = units_5, kernel_regularizer=l2(l=L2)))
    if batch_normalization == "yes":
        model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [30]:
# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []

k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
    # defining the model
    model = KerasClassifier(model=create_model_five_hidden_layers, epochs=10, batch_size=64, verbose=0)
    # Grid of parameters to choose from
    units = [5, 10, 15, 20]
    #dropout_rate = [0, 0.25, 0.5]
    dropout_rate = [0]
    #L2 = [0.1, 0.01, 0.001, 0]
    L2 = [0.1]
    #batch_normalization = ["yes", "no"]
    batch_normalization = ["no"]
    #learning_rate = [0.01, 0.001, 0.0001]
    learning_rate = [0.01]
    param_grid = dict(model__units_1=[15], model__units_2=[15], model__units_3=[20], model__units_4=units, model__units_5=units,
                  model__dropout_rate = dropout_rate,
                 model__batch_normalization = batch_normalization, 
                 optimizer__learning_rate=learning_rate, 
                 model__L2 = L2)
    # Running the grid search
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv_inner, refit=True)
    grid_result = grid.fit(X_train, y_train)
    model_3_3 = grid_result.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_3_3.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the results
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1    
    
df_acc_kfold["MLP - MLP5"]=result_acc
df_brier_kfold["MLP - MLP5"]=result_brier
df_gini_kfold["MLP - MLP5"]=result_gini
# summarize the estimated performance of the model
print("\n***  MLP with Five hidden layers - MLP5  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.8913035859394678 , brier score =  0.8913294797687861 , partial gini =  0.7826071718789356
K =  2 , auc =  0.886414303549135 , brier score =  0.8863198458574181 , partial gini =  0.77282860709827
K =  3 , auc =  0.8820746030689978 , brier score =  0.8820809248554913 , partial gini =  0.7641492061379955
K =  4 , auc =  0.8743562541505643 , brier score =  0.874373795761079 , partial gini =  0.7487125083011286
K =  5 , auc =  0.887858136531738 , brier score =  0.8878612716763006 , partial gini =  0.7757162730634759
K =  6 , auc =  0.8862369511834371 , brier score =  0.8863198458574181 , partial gini =  0.7724739023668743
K =  7 , auc =  0.8909119927528478 , brier score =  0.8905587668593449 , partial gini =  0.7818239855056957
K =  8 , auc =  0.8773363652577135 , brier score =  0.8770712909441233 , partial gini =  0.754672730515427
K =  9 , auc =  0.8839416761334756 , brier score =  0.8839629915188898 , partial gini =  0.7678833522669513
K =  10 , auc =  0.894830506861569

### 5.4 Deep Learning - LSTM & CNN
#### 5.4.1 Model 4-1 : LSTM - with one LSTM

In [33]:
from keras.layers import Conv2D, Flatten, LSTM, Activation, Input, Conv1D, MaxPooling1D
from keras.models import Model

def create_one_lstm_layer(units):
    model = Sequential()
    model.add(LSTM(units = units, return_sequences=False, input_shape=(X_train.shape[1],1)))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy',  metrics=['accuracy'])
    return model

In [34]:
# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []

k=1
for train_ix, test_ix in cv_outer.split(X):
    # splitting the data
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configuring the cross-validation procedure
    cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
    # defining the model
    model = KerasClassifier(model=create_one_lstm_layer, epochs=10, batch_size=64, verbose=0)
    # Grid of parameters to choose from
    units = [10, 20, 50]
    param_grid = dict(model__units=units)
    # Running the grid search
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv_inner, refit=True)
    grid_result = grid.fit(X_train, y_train)
    model_4_1 = grid_result.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_4_1.predict(X_test)
    # evaluating the model
    acc, brier, gini = get_scores(y_test, yhat)
    # storing the results
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1
    
df_acc_kfold["DL - LSTM1"]=result_acc
df_brier_kfold["DL - LSTM1"]=result_brier
df_gini_kfold["DL - LSTM1"]=result_gini

# summarize the estimated performance of the model
print("\n***  DL with one LSTM layer - LSTM1  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.7997254470544068 , brier score =  0.7996146435452793 , partial gini =  0.5994508941088137
K =  2 , auc =  0.8123105047262351 , brier score =  0.8111753371868979 , partial gini =  0.6246210094524702
K =  3 , auc =  0.7870327358181222 , brier score =  0.7861271676300579 , partial gini =  0.5740654716362443
K =  4 , auc =  0.8358449568935304 , brier score =  0.8358381502890173 , partial gini =  0.6716899137870609
K =  5 , auc =  0.7887937435328416 , brier score =  0.7888246628131022 , partial gini =  0.5775874870656832
K =  6 , auc =  0.7938162241919445 , brier score =  0.7942196531791907 , partial gini =  0.5876324483838891
K =  7 , auc =  0.8195095332862553 , brier score =  0.8211946050096339 , partial gini =  0.6390190665725106
K =  8 , auc =  0.8126404494382022 , brier score =  0.8115606936416185 , partial gini =  0.6252808988764045
K =  9 , auc =  0.8180660601964221 , brier score =  0.818041634541249 , partial gini =  0.6361321203928443
K =  10 , auc =  0.8113401473

#### 5.4.2 Model 4-2 : CNN1 + LSTM2

In [35]:
def create_one_conv_2_lstm_layers(filters):
    inputs = Input(shape = (X_train.shape[1],1,))
    x = Conv1D(filters = filters, kernel_size = 3, padding = 'same', activation = 'relu')(inputs)
    x = MaxPooling1D(pool_size = 2, padding = 'same')(x)
    x = LSTM(128, return_sequences = True)(x)
    x = LSTM(128)(x)
    outputs = Dense(1,activation ='sigmoid')(x)
    model = Model(inputs = inputs,outputs = outputs)
    model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [36]:
# Nested Cross-Validation was used in this study to both tune and evalute the model
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
result_acc = []
result_brier = []
result_gini = []

k=1
for train_ix, test_ix in cv_outer.split(X):
    # configuring the cross-validation procedure
    X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    # configure the cross-validation procedure
    cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
    # defining the model
    model = KerasClassifier(model=create_one_conv_2_lstm_layers, epochs=10, batch_size=64, verbose=0)
    # Grid of parameters to choose from
    filters = [10, 20, 50]
    param_grid = dict(model__filters=filters)
    # Running the grid search
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv_inner, refit=True)
    grid_result = grid.fit(X_train, y_train)
    model_4_2 = grid_result.best_estimator_
    # evaluating the model on the hold out dataset
    yhat = model_4_2.predict(X_test)
    # storing the results
    acc, brier, gini = get_scores(y_test, yhat)
    # store the result
    result_acc.append(acc)
    result_brier.append(brier)
    result_gini.append(gini)
    print("K = ", k, ", auc = ", acc, ", brier score = ", brier, ", partial gini = ", gini)
    k+=1
    
df_acc_kfold["DL - CNN1&LSTM2"]=result_acc
df_brier_kfold["DL - CNN1&LSTM2"]=result_brier
df_gini_kfold["DL - CNN1&LSTM2"]=result_gini

# summarize the estimated performance of the model
print("\n***  DL with one CNN and 2 LSTM layers - CNN1&LSTM2  ***\n")
print('AUC : %.3f (%.3f)' % (np.mean(result_acc), np.std(result_acc)))
print('Brier Score : %.3f (%.3f)' % (np.mean(result_brier), np.std(result_brier)))
print('Partial Gini : %.3f (%.3f)' % (np.mean(result_gini), np.std(result_gini)))

K =  1 , auc =  0.8693215298931252 , brier score =  0.8697495183044316 , partial gini =  0.7386430597862503
K =  2 , auc =  0.8725610843588372 , brier score =  0.8736030828516378 , partial gini =  0.7451221687176743
K =  3 , auc =  0.8844727031226982 , brier score =  0.8840077071290944 , partial gini =  0.7689454062453964
K =  4 , auc =  0.8847654834613004 , brier score =  0.8847784200385357 , partial gini =  0.7695309669226007
K =  5 , auc =  0.8751741900533767 , brier score =  0.8751445086705203 , partial gini =  0.7503483801067534
K =  6 , auc =  0.8741210203114251 , brier score =  0.874373795761079 , partial gini =  0.7482420406228503
K =  7 , auc =  0.8829185480016833 , brier score =  0.8832369942196532 , partial gini =  0.7658370960033667
K =  8 , auc =  0.8797708221865526 , brier score =  0.8805394990366089 , partial gini =  0.7595416443731051
K =  9 , auc =  0.8843402629781482 , brier score =  0.8843484965304549 , partial gini =  0.7686805259562963
K =  10 , auc =  0.8945830716

## <font color='#00008B'> 6. Model Comparison

### 6.1 Comparing the Models regarding AUC

In [None]:
# In progress

### 6.2 Comparing the Models regarding Brier Score

In [None]:
# In progress

### 6.3 Comparing the Models regarding Partial Gini

In [None]:
# In progress

### 6.4 Comparing the Models regarding the Average Performances

In [None]:
# In progress