In [1]:
from keras.models import Sequential
from keras.layers import Dense

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')

### Profile the data

1. Sample
2. Shape
3. Characteristics

In [4]:
dataset.head()


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
dataset.shape


(10000, 14)

In [6]:
dataset.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


### Create features and target based on what import features are believed to contribute to the model, slicing out row number, customerid and surname as they are not needed

In [20]:
# features and target
X = dataset.iloc[:,3:13].values
y = dataset.iloc[:,13].values

In [21]:

#### These are the labels of exited/churn
print(y)

[1 0 1 ... 1 1 0]


### Encode into matrix as to allow for neural network to learn

In [22]:
#turn to encoded labels eg numbers
labelencoder_X_1 = LabelEncoder()
X[:,1] = labelencoder_X_1.fit_transform(X[:,1])
labelencoder_X_2 = LabelEncoder()
X[:,2] = labelencoder_X_2.fit_transform(X[:,2])

In [23]:
X

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

### need to create dummy variables as the countries column do not have ordinal values


In [24]:

onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()

In [28]:
X = X[:,1:]

In [31]:
### Check the shape of the data

print(X.shape)
print(y.shape)

(10000, 11)
(10000,)


### need to create dummy variables as the countries column does not have ordinal value

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0  )

### Feature Scaling

In [33]:

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Build model
#### Adding layers
#### each feature will be an input node X_test has 11 features so 11 input nodes
#### choose activation function ReLU and the output layer will have a sigmoid
#### add input and 1st,2nd Hidden layers
#### output layer with a binary output so only use 1 node if you have three or four outputs then use softmax

In [34]:
#Building Model
classifier = Sequential()
classifier.add(Dense(units = 6, 
                     kernel_initializer='uniform', 
                     activation ='relu',
                    input_dim = 11))
classifier.add(Dense(units = 6, 
                     kernel_initializer='uniform', 
                     activation ='relu'))
classifier.add(Dense(units = 1, 
                     kernel_initializer='uniform', 
                     activation ='sigmoid'))


In [35]:
#compile the ANN
classifier.compile(optimizer ='adam', 
                   loss = 'binary_crossentropy', 
                   metrics = ['accuracy'] )

### Fit the neural network to the training set

In [36]:

classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 10)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1e37e380898>

### predictions on the test set
#### check if predictions on the test set greater than 50% chance of churning out of the bank
#### check with confusion matrix

In [37]:
#predictions on the test set
y_pred = classifier.predict(X_test)

In [38]:
y_pred = (y_pred > 0.5)

In [39]:
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [40]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [41]:
cm

array([[1563,   32],
       [ 291,  114]])

### from confusion matrix we can calculate the score overall in percentage

In [49]:
print("Overall model score in accuracy: {}%".format((1545 + 114)/(2000)*100))

Overall model score in accuracy: 82.95%


In [50]:
"""
New Sample Info

Geography = France
Credit_Score = 600
Gender = Male
Age = 40
Tenure = 3
Balance = 60000
Number_of_Products = 2
Has_Credit_Card = Yes
Is_Active = Yes
Estimated_Salary = 50000
"""


'\nNew Sample Info\n\nGeography = France\nCredit_Score = 600\nGender = Male\nAge = 40\nTenure = 3\nBalance = 60000\nNumber_of_Products = 2\nHas_Credit_Card = Yes\nIs_Active = Yes\nEstimated_Salary = 50000\n'

In [51]:
#new sample entry and prediction

new_prediction = classifier.predict(sc.transform(np.array([[0.0,0,600,1,40,3,60000,2,1,1,50000]])))
new_prediction = (new_prediction > 0.5)

In [53]:
print("Within a 50% likelihood will this customer churn out of the bank? {}".format(new_prediction))

Within a 50% likelihood will this customer churn out of the bank? [[False]]


##### Further development with this model will be to start to isolate the importance and contribution of each feature, and to also start to include further variables which may be of importance such as further credit history details, number of previous banks, customer feedback, etc