# Objective: Given a Bank customer, can we build a classifier which can determine whether they will leave or not using Neural networks?

# 1. Read the dataset

In [67]:
# Importing basic Python packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [68]:
bank_data_df = pd.read_csv('bank.csv')
bank_data_df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [69]:
bank_data_df.shape

(10000, 14)

# 2. Drop the columns which are unique for all users like IDs

In [70]:
bank_data_df = bank_data_df.drop(['RowNumber','CustomerId','Surname'],axis=1)

In [71]:
bank_data_df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [72]:
bank_data_df.shape

(10000, 11)

# 3. Distinguish the feature and target set

In [73]:
# Defining y as the feature that indicates if the customer has exited or not
y = bank_data_df['Exited']
y.shape

(10000,)

In [74]:
# Defining X as the rest of the features
X = bank_data_df.drop(['Exited'],axis=1)
X.shape

(10000, 10)

In [75]:
print(X)

      CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0             619    France  Female   42       2       0.00              1   
1             608     Spain  Female   41       1   83807.86              1   
2             502    France  Female   42       8  159660.80              3   
3             699    France  Female   39       1       0.00              2   
4             850     Spain  Female   43       2  125510.82              1   
5             645     Spain    Male   44       8  113755.78              2   
6             822    France    Male   50       7       0.00              2   
7             376   Germany  Female   29       4  115046.74              4   
8             501    France    Male   44       4  142051.07              2   
9             684    France    Male   27       2  134603.88              1   
10            528    France    Male   31       6  102016.72              2   
11            497     Spain    Male   24       3       0.00     

In [76]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# First encoding Geography
labelEnc_1 = LabelEncoder()
X['Geography'] = labelEnc_1.fit_transform(X['Geography'])

# Now encoding Gender
labelEnc_2 = LabelEncoder()
X['Gender'] = labelEnc_2.fit_transform(X['Gender'])

In [77]:
print(X)

      CreditScore  Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0             619          0       0   42       2       0.00              1   
1             608          2       0   41       1   83807.86              1   
2             502          0       0   42       8  159660.80              3   
3             699          0       0   39       1       0.00              2   
4             850          2       0   43       2  125510.82              1   
5             645          2       1   44       8  113755.78              2   
6             822          0       1   50       7       0.00              2   
7             376          1       0   29       4  115046.74              4   
8             501          0       1   44       4  142051.07              2   
9             684          0       1   27       2  134603.88              1   
10            528          0       1   31       6  102016.72              2   
11            497          2       1   24       3   

In [78]:
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

In [79]:
print(X)

[[0.0000000e+00 0.0000000e+00 6.1900000e+02 ... 1.0000000e+00
  1.0000000e+00 1.0134888e+05]
 [0.0000000e+00 1.0000000e+00 6.0800000e+02 ... 0.0000000e+00
  1.0000000e+00 1.1254258e+05]
 [0.0000000e+00 0.0000000e+00 5.0200000e+02 ... 1.0000000e+00
  0.0000000e+00 1.1393157e+05]
 ...
 [0.0000000e+00 0.0000000e+00 7.0900000e+02 ... 0.0000000e+00
  1.0000000e+00 4.2085580e+04]
 [1.0000000e+00 0.0000000e+00 7.7200000e+02 ... 1.0000000e+00
  0.0000000e+00 9.2888520e+04]
 [0.0000000e+00 0.0000000e+00 7.9200000e+02 ... 1.0000000e+00
  0.0000000e+00 3.8190780e+04]]


# 4. Divide the data set into Train and test sets

In [80]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# 5. Normalize the train and test data

In [81]:
from sklearn.preprocessing import StandardScaler
stdScaler = StandardScaler()
X_train = stdScaler.fit_transform(X_train)
X_test = stdScaler.transform(X_test)

# 6. Initialize & build the model

In [82]:
import tensorflow as tf
model = tf.keras.models.Sequential()

In [83]:
# Adding the input layer and the first hidden layer
model.add(tf.keras.layers.Dense(6,input_dim=11,activation='relu'))

Instructions for updating:
Colocations handled automatically by placer.


In [84]:
# Adding the second hidden layer
model.add(tf.keras.layers.Dense(6,activation='relu'))

In [85]:
# Output layer
model.add(tf.keras.layers.Dense(1,activation = 'sigmoid'))

# 7. Optimize the model

In [86]:
# Create optimizer with non-default learning rate
sgd_optimizer = tf.keras.optimizers.SGD(lr=0.03)

In [87]:
# Compile the model
model.compile(optimizer=sgd_optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])

In [88]:
# Fit the model
model.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

<tensorflow.python.keras.callbacks.History at 0x1a33208438>

# 8. Predict the results using 0.5 as a threshold

In [89]:
threshold = 0.5
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)

In [90]:
print(y_pred)

[[False]
 [False]
 [False]
 ...
 [False]
 [False]
 [False]]


# 9. Print the Accuracy score and confusion matrix

In [91]:
from sklearn.metrics import confusion_matrix
ConfMatrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: \n{}".format(ConfMatrix))

Confusion Matrix: 
[[2260  113]
 [ 301  326]]


In [92]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)

In [93]:
# Printing the accuracy
print("Accuracy is {:.2f}%".format(accuracy*100))

Accuracy is 86.20%
