In [1]:
%matplotlib inline

from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from mord import LogisticIT
import matplotlib.pylab as plt
import seaborn as sns
from dmba import classificationSummary, gainsChart, liftChart
from dmba.metric import AIC_score


In [2]:
## load the .csv file, and split them into training and test data sets


bank_df = pd.read_csv('UniversalBank.csv')
bank_df.drop(columns=['ID', 'ZIP Code'], inplace=True)
bank_df.columns = [c.replace(' ', '_') for c in bank_df.columns]

# Treat education as categorical, convert to dummy variables
bank_df['Education'] = bank_df['Education'].astype('category')
new_categories = {1: 'Undergrad', 2: 'Graduate', 3: 'Advanced/Professional'}
bank_df.Education.cat.rename_categories(new_categories, inplace=True)
bank_df = pd.get_dummies(bank_df, prefix_sep='_', drop_first=True)

y = bank_df['Personal_Loan']
X = bank_df.drop(columns=['Personal_Loan'])

# partition data
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.4, random_state=1)


  res = method(*args, **kwargs)


In [3]:
X.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Mortgage,Securities_Account,CD_Account,Online,CreditCard,Education_Graduate,Education_Advanced/Professional
0,25,1,49,4,1.6,0,1,0,0,0,0,0
1,45,19,34,3,1.5,0,1,0,0,0,0,0
2,39,15,11,1,1.0,0,0,0,0,0,0,0
3,35,9,100,1,2.7,0,0,0,0,0,1,0
4,35,8,45,4,1.0,0,0,0,0,1,1,0


In [4]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Personal_Loan, dtype: int64

In [5]:
## build the network

from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    layers.Dense(16, activation="LeakyReLU"),
    layers.Dense(16, activation="relu"),
    layers.Dense(8, activation="elu"),
    layers.Dense(1, activation='sigmoid')
])

In [6]:
## compile procedure

model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])



In [7]:
## fit model

model.fit(train_X, train_y, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x22d55540310>

In [8]:
## predicted probabilities in test data
predictions_prob = model.predict(test_X)
predictions_prob[0:10]

array([[0.17997804],
       [0.05218053],
       [0.00925452],
       [0.10727301],
       [0.08445016],
       [0.01083273],
       [0.00432196],
       [0.21189487],
       [0.0309974 ],
       [0.06452024]], dtype=float32)

In [9]:
## predicted results 

predictions = (model.predict(test_X) > 0.5).astype(int)


In [10]:
# confusion matrix

classificationSummary(test_y, predictions)

Confusion Matrix (Accuracy 0.8795)

       Prediction
Actual    0    1
     0 1716   91
     1  150   43


In [11]:
## evaluate accuracy

accuracy = model.evaluate(test_X, test_y)

print(accuracy)

[0.3430103063583374, 0.8794999718666077]
