# Project - Bank Retention Clients
# This is a NLP project that the goal is guess what perfil of client has more tendency to leave the bank account.

In [3]:
# First as always I will import the main libraries.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
# Now it's time to import the file and check your informations.

dataset = pd.read_csv('../input/Churn_Modelling.csv')

dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
# Before to start a preprocessing the data I have to import the dataset in the variables.

X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

# I put in "X" the variable independents and in "y" the variable dependent.
# Notes that I didn't included "RowNumber", "CustomerId" and "Surname", because none of them is relevant for my model.

In [7]:
# As we can see above there are some columns with categorical values.
# Now I will encoding them.
# Let's start to preprocessing our dataset.

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
# X_1 refers to "Geography" column.
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
# X_2 refers to "Gender" column.
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:,1:]

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [9]:
# Let's check if it worked.

X

# I didn't modify the "y" variable, because there is just booleans values and, for my model, is desnecessary encode them.

array([[0.0000000e+00, 0.0000000e+00, 6.1900000e+02, ..., 1.0000000e+00,
        1.0000000e+00, 1.0134888e+05],
       [0.0000000e+00, 1.0000000e+00, 6.0800000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 1.1254258e+05],
       [0.0000000e+00, 0.0000000e+00, 5.0200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 1.1393157e+05],
       ...,
       [0.0000000e+00, 0.0000000e+00, 7.0900000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 4.2085580e+04],
       [1.0000000e+00, 0.0000000e+00, 7.7200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 9.2888520e+04],
       [0.0000000e+00, 0.0000000e+00, 7.9200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 3.8190780e+04]])

In [10]:
# Now I will prepare my data before to start my Train and Test set.

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

In [11]:
# For there isn't any proble with correlations between the values,
# I will aplly the "Feature Scaling".

from  sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [12]:
# Now I will start the more excite part: make the ANN!!!!

# Firt of all: Let's import the Keras libraries and packages.

import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [13]:
# I will inicializing the ANN.
# As this problem is solved by Classification algorithms, I will call my variable of "classifier".

classifier = Sequential()

In [16]:
# I will add the input layer and the first hidden layer.

classifier.add(Dense(output_dim=6, init='uniform', activation='relu', input_dim=11))

# In "output_dim" I used a not best technique that is get the quantity of output plus input and divide by 2, so 11 + 1 / 2 = 6.
# In "Activiation" I used the best function for hidden layers: "relu".

  This is separate from the ipykernel package so we can avoid doing imports until


In [17]:
# Adding the second layer.

classifier.add(Dense(output_dim=6, init='uniform', activation='relu'))

# Note that this time I didn't insert the "input_dim" paramether, because now I Know how many input I have in my ANN.

  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
# Almost finish. Now I will add the output layer.

classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid'))

# In "output_dim" I put one because I have to guess only the clients who leaves the bank accounts.
# In "activation" I use the best function for it: "sigmoid"

  This is separate from the ipykernel package so we can avoid doing imports until


In [19]:
# Compiling the ANN.

classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# If I have more than one output, I would have to use in loss paramether other cross entropy.

In [20]:
# Let's do the ANN work for us!!!!

classifier.fit(X_train, y_train, batch_size=10, nb_epoch=100)

  This is separate from the ipykernel package so we can avoid doing imports until


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7fc87d852748>

# That's AMAZING!!!
# When you see the artificial brains which you built working on real-time!!!

In [23]:
# Now our ANN model is done and Let's predict the result.

y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [24]:
# Let's make the Confusion Matrix and see if this model is good enought to be delivery to client.

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [25]:
# Let's see the result....

cm

array([[1507,   88],
       [ 189,  216]])

In [26]:
# In order to be more easy to see the porcentage, let's do a simple calculus.

(1507 + 216)/2000

# That's good!!!

0.8615

# Was fantastic do my first Deep Learnig Project and share with you.

# See you in the next Project!!! 😎🚀👍