### Import lbraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

### Read dataset

In [20]:
df = pd.read_csv('bank-full.csv', delimiter=';')

In [5]:
df

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,technician,married,tertiary,no,825,no,no,cellular,17,nov,977,3,-1,0,unknown,yes
45207,71,retired,divorced,primary,no,1729,no,no,cellular,17,nov,456,2,-1,0,unknown,yes
45208,72,retired,married,secondary,no,5715,no,no,cellular,17,nov,1127,5,184,3,success,yes
45209,57,blue-collar,married,secondary,no,668,no,no,telephone,17,nov,508,4,-1,0,unknown,no


### Transform dataframe

In [21]:
df['education'] = df['education'].apply(
                    lambda row: 1 if row == 'primary' else (
                        2 if row == 'sencondary' else (
                            3 if row == 'tertiary' else 0
                        )
                    )
                )

In [26]:
df['poutcome'] = df['poutcome'].apply(
                    lambda row: 1 if row == 'other' else (
                        2 if row == 'failure' else (
                            3 if row == 'success' else 0
                        )
                    )
                )

In [22]:
for item in ['default','housing','loan','y']:
    df[item] = df[item].apply(
        lambda row: 1 if row == 'yes' else 0
    )

In [29]:
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
for item in ['job','marital','contact','month']:
    df[item] = label.fit_transform(df[item])

In [30]:
df

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,4,1,3,0,2143,1,0,2,5,8,261,1,-1,0,0,0
1,44,9,2,0,0,29,1,0,2,5,8,151,1,-1,0,0,0
2,33,2,1,0,0,2,1,1,2,5,8,76,1,-1,0,0,0
3,47,1,1,0,0,1506,1,0,2,5,8,92,1,-1,0,0,0
4,33,11,2,0,0,1,0,0,2,5,8,198,1,-1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,9,1,3,0,825,0,0,0,17,9,977,3,-1,0,0,1
45207,71,5,0,1,0,1729,0,0,0,17,9,456,2,-1,0,0,1
45208,72,5,1,0,0,5715,0,0,0,17,9,1127,5,184,3,3,1
45209,57,1,1,0,0,668,0,0,1,17,9,508,4,-1,0,0,0


### Grid search for Artificial Neural Network (ANN)

In [59]:
MLP = MLPClassifier()
searchANN = GridSearchCV(estimator = MLP,
                     param_grid = {
                         'hidden_layer_sizes': [10, 20, 50, 100], 
                         'solver': ['lbfgs', 'sgd', 'adam']
                     } , scoring='f1',
                     cv=5,
                     n_jobs = -1)

In [60]:
searchANN.fit(df.drop('y', axis=1), df['y'])

GridSearchCV(cv=5, estimator=MLPClassifier(), n_jobs=-1,
             param_grid={'hidden_layer_sizes': [10, 20, 50, 100],
                         'solver': ['lbfgs', 'sgd', 'adam']},
             scoring='f1')

In [61]:
searchANN.best_estimator_

MLPClassifier(hidden_layer_sizes=20)

### Grid search for Naïve Bayes (NB)

In [62]:
NB = GaussianNB()
searchNB = GridSearchCV(estimator = NB,
                     param_grid = {
                         'var_smoothing': np.logspace(0,-9, num=10)
                     } , scoring='f1',
                     cv=5,
                     n_jobs = -1)

In [63]:
searchNB.fit(df.drop('y', axis=1), df['y'])

GridSearchCV(cv=5, estimator=GaussianNB(), n_jobs=-1,
             param_grid={'var_smoothing': array([1.e+00, 1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07,
       1.e-08, 1.e-09])},
             scoring='f1')

In [64]:
searchNB.best_estimator_

GaussianNB(var_smoothing=1e-08)

### Train and test split

In [49]:
xTrain, xTest, yTrain, yTest = train_test_split(
    df.drop('y', axis=1), df['y'],
    test_size = .2
)

### Model fitting for ANN

In [65]:
modelANN = searchANN.best_estimator_
modelANN.fit(xTrain, yTrain)

MLPClassifier(hidden_layer_sizes=20)

### Train evaluation for ANN

In [66]:
prediction = modelANN.predict(xTrain)
print(classification_report(yTrain,prediction))
print(confusion_matrix(yTrain, prediction))

              precision    recall  f1-score   support

           0       0.89      0.99      0.94     31914
           1       0.66      0.11      0.19      4254

    accuracy                           0.89     36168
   macro avg       0.78      0.55      0.56     36168
weighted avg       0.87      0.89      0.85     36168

[[31683   231]
 [ 3796   458]]


### Test evaluation for ANN

In [67]:
prediction = modelANN.predict(xTest)
print(classification_report(yTest,prediction))
print(confusion_matrix(yTest, prediction))

              precision    recall  f1-score   support

           0       0.90      0.99      0.94      8008
           1       0.66      0.12      0.20      1035

    accuracy                           0.89      9043
   macro avg       0.78      0.55      0.57      9043
weighted avg       0.87      0.89      0.86      9043

[[7947   61]
 [ 915  120]]


### Model fitting for NB

In [68]:
modelNB = searchNB.best_estimator_
modelNB.fit(xTrain, yTrain)

GaussianNB(var_smoothing=1e-08)

### Train evaluation for NB

In [69]:
prediction = modelNB.predict(xTrain)
print(classification_report(yTrain,prediction))
print(confusion_matrix(yTrain, prediction))

              precision    recall  f1-score   support

           0       0.93      0.88      0.90     31914
           1       0.36      0.52      0.43      4254

    accuracy                           0.84     36168
   macro avg       0.65      0.70      0.67     36168
weighted avg       0.86      0.84      0.85     36168

[[28071  3843]
 [ 2061  2193]]


### Test evaluation for NB

In [71]:
prediction = modelNB.predict(xTest)
print(classification_report(yTest,prediction))
print(confusion_matrix(yTest, prediction))

              precision    recall  f1-score   support

           0       0.93      0.87      0.90      8008
           1       0.35      0.52      0.42      1035

    accuracy                           0.83      9043
   macro avg       0.64      0.70      0.66      9043
weighted avg       0.87      0.83      0.85      9043

[[7006 1002]
 [ 497  538]]
