# Executive Summary

An Artificial Neural Network was developed to determine a bank’s probability of member retention (current and future). This predictive model used the bank’s historical data (credit score, gender, geographic location, bank tenure, balance, credit card, active status, etc.)and Python (Keras backed by TensorFlow, Scikit-Learn and Pandas) to analyze and structure the data set.

## Model Performance

The accuracy in the model for the training set was calculated at 83.5% and 84% for the testing set. When the model was optimized it was possible to increase the training set accuracy to 85% by applying this new set of hyper parameters: batch_size= 25, nb_epoch= 500, optimizer=rmsprop.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

%matplotlib inline

In [2]:
#Uploading data set
dataset=pd.read_csv('Churn_Modelling.csv')
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
#Structuring data set
x=dataset.iloc[:,3:13].values
y=dataset.iloc[:,13].values
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
RowNumber          10000 non-null int64
CustomerId         10000 non-null int64
Surname            10000 non-null object
CreditScore        10000 non-null int64
Geography          10000 non-null object
Gender             10000 non-null object
Age                10000 non-null int64
Tenure             10000 non-null int64
Balance            10000 non-null float64
NumOfProducts      10000 non-null int64
HasCrCard          10000 non-null int64
IsActiveMember     10000 non-null int64
EstimatedSalary    10000 non-null float64
Exited             10000 non-null int64
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [4]:
x

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ..., 
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [5]:
labelencoder_x_1=LabelEncoder()
x[:,1]=labelencoder_x_1.fit_transform(x[:,1])
labelencoder_x_2=LabelEncoder()
x[:,2]=labelencoder_x_2.fit_transform(x[:,2])
onehotencoder=OneHotEncoder(categorical_features=[1])
x=onehotencoder.fit_transform(x).toarray()
x=x[:,1:]
x=pd.DataFrame(x)

In [6]:
#Splitting data set into training and test sets
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)

In [7]:
sc=StandardScaler()
xtrain=sc.fit_transform(xtrain)
xtest=sc.transform(xtest)

In [8]:
#Importing the Keras libraries and packages. 
#Keras will be using TesorFlow as a backend.
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

Using TensorFlow backend.


In [9]:
#The Artificial Neural Network portion of the model
#The Input, Hidden and Output Layers are being developed
#To prevent overfitting the dropout function has been included
classifier=Sequential()
classifier.add(Dense(units=6,kernel_initializer='uniform',activation='relu',input_dim=11))
classifier.add(Dropout(p=0.1))

classifier.add(Dense(units=6,kernel_initializer='uniform',activation='relu'))
classifier.add(Dropout(p=0.1))

classifier.add(Dense(units=1,kernel_initializer='uniform',activation='sigmoid'))


In [10]:
#Compiling the ANN
#The descent and loss conditions will be added. 
#In this case it will be Stochastic Gradient descent and Logarithmic Loss.
classifier.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [11]:
#THe model is analyzing the train sets
#Without doing a optimization analysis of the code the accuracy from the training set is 0.8354
classifier.fit(xtrain,ytrain,batch_size=10,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0xee57748>

In [12]:
ypred=classifier.predict(xtest)
ypred

array([[ 0.17409831],
       [ 0.20448771],
       [ 0.23335359],
       ..., 
       [ 0.16684215],
       [ 0.22242725],
       [ 0.0849918 ]], dtype=float32)

In [13]:
ypred=(ypred>0.5)
ypred

array([[False],
       [False],
       [False],
       ..., 
       [False],
       [False],
       [False]], dtype=bool)

In [14]:
#Without doing a optimization analysis of the code the accuracy from the test set is 0.84
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(ytest,ypred)
cm

array([[1555,   33],
       [ 286,  126]], dtype=int64)

In [19]:
np.around(((cm[0,0]+cm[1,1])/2000),decimals=3)

0.83999999999999997

In [20]:
#New Member single prediction using random values. The model correctly predicted that the new member was likely to stay.
newpred=classifier.predict(sc.transform(np.array([[0.0,0,600,1,40,3,60000,2,1,1,50000]])))
newpred=(newpred>0.5)
newpred

array([[False]], dtype=bool)

In [21]:
check=pd.DataFrame(x)
check.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
0     10000 non-null float64
1     10000 non-null float64
2     10000 non-null float64
3     10000 non-null float64
4     10000 non-null float64
5     10000 non-null float64
6     10000 non-null float64
7     10000 non-null float64
8     10000 non-null float64
9     10000 non-null float64
10    10000 non-null float64
dtypes: float64(11)
memory usage: 859.5 KB


In [22]:
#Optimizing the model by evaluating different hyper parameters 
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [23]:
def build_classifier():
    classifier=Sequential()
    classifier.add(Dense(units=6,kernel_initializer='uniform',activation='relu',input_dim=11))
    classifier.add(Dense(units=6,kernel_initializer='uniform',activation='relu'))   
    classifier.add(Dense(units=1,kernel_initializer='uniform',activation='sigmoid'))
    classifier.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    return classifier
    

In [None]:
classifier=KerasClassifier(build_fn=build_classifier)
parameters={'batch_size':[25,32],'nb_epoch':{100,500},'optimizer':['adam','rmsprop']}
grid_search=GridSearchCV(estimator=classifier,param_grid=parameters,scoring='accuracy',cv=10)
grid_search=grid_search.fit(xtrain,ytrain)
best_parameter=grid_search.best_params_
best_accuray=grid.search.best_score_