# Part 1: Data Preprocessing

In [1]:
# Importing the libraries
import numpy as np
import pandas as pd
# setup numpy print options to see float values upto 2 decimal
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})

In [2]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [3]:
print (X[0:5])
print (y[0:5])

[[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 'Female' 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 'Female' 43 2 125510.82 1 1 1 79084.1]]
[1 0 1 0 0]


In [4]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

In [5]:
print (X[0:5])

[[619 0 0 42 2 0.0 1 1 1 101348.88]
 [608 2 0 41 1 83807.86 1 0 1 112542.58]
 [502 0 0 42 8 159660.8 3 1 0 113931.57]
 [699 0 0 39 1 0.0 2 0 0 93826.63]
 [850 2 0 43 2 125510.82 1 1 1 79084.1]]


In [6]:
# one hot encoding on country (at index 1) to have dummy_france, dummy_germany, dummy_spain
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
print (X[0:5])

[[1.00 0.00 0.00 619.00 0.00 42.00 2.00 0.00 1.00 1.00 1.00 101348.88]
 [0.00 0.00 1.00 608.00 0.00 41.00 1.00 83807.86 1.00 0.00 1.00 112542.58]
 [1.00 0.00 0.00 502.00 0.00 42.00 8.00 159660.80 3.00 1.00 0.00 113931.57]
 [1.00 0.00 0.00 699.00 0.00 39.00 1.00 0.00 2.00 0.00 0.00 93826.63]
 [0.00 0.00 1.00 850.00 0.00 43.00 2.00 125510.82 1.00 1.00 1.00 79084.10]]


In [7]:
# drop first row dummy_france for avoiding dummy variable trap
X = X[:, 1:]
print (X[0:5])

[[0.00 0.00 619.00 0.00 42.00 2.00 0.00 1.00 1.00 1.00 101348.88]
 [0.00 1.00 608.00 0.00 41.00 1.00 83807.86 1.00 0.00 1.00 112542.58]
 [0.00 0.00 502.00 0.00 42.00 8.00 159660.80 3.00 1.00 0.00 113931.57]
 [0.00 0.00 699.00 0.00 39.00 1.00 0.00 2.00 0.00 0.00 93826.63]
 [0.00 1.00 850.00 0.00 43.00 2.00 125510.82 1.00 1.00 1.00 79084.10]]


In [8]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

print (X_train[0:5])
print (X_test[0:5])
print (y_train[0:5])
print (y_test[0:5])

[[0.00 1.00 667.00 0.00 34.00 5.00 0.00 2.00 1.00 0.00 163830.64]
 [1.00 0.00 427.00 1.00 42.00 1.00 75681.52 1.00 1.00 1.00 57098.00]
 [0.00 0.00 535.00 0.00 29.00 2.00 112367.34 1.00 1.00 0.00 185630.76]
 [0.00 1.00 654.00 1.00 40.00 5.00 105683.63 1.00 1.00 0.00 173617.09]
 [0.00 1.00 850.00 0.00 57.00 8.00 126776.30 2.00 1.00 1.00 132298.49]]
[[1.00 0.00 597.00 0.00 35.00 8.00 131101.04 1.00 1.00 1.00 192852.67]
 [0.00 0.00 523.00 0.00 40.00 2.00 102967.41 1.00 1.00 0.00 128702.10]
 [0.00 1.00 706.00 0.00 42.00 8.00 95386.82 1.00 1.00 1.00 75732.25]
 [0.00 0.00 788.00 1.00 32.00 4.00 112079.58 1.00 0.00 0.00 89368.59]
 [1.00 0.00 706.00 1.00 38.00 5.00 163034.82 2.00 1.00 1.00 135662.17]]
[0 0 0 0 0]
[0 1 0 0 0]


In [9]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

print (X_train[0:5])
print (X_test[0:5])
print (y_train[0:5])
print (y_test[0:5])

[[-0.57 1.74 0.17 -1.09 -0.46 0.01 -1.22 0.81 0.64 -1.03 1.11]
 [1.75 -0.57 -2.30 0.92 0.30 -1.38 -0.01 -0.92 0.64 0.97 -0.75]
 [-0.57 -0.57 -1.19 -1.09 -0.94 -1.03 0.58 -0.92 0.64 -1.03 1.49]
 [-0.57 1.74 0.04 0.92 0.11 0.01 0.47 -0.92 0.64 -1.03 1.28]
 [-0.57 1.74 2.06 -1.09 1.74 1.04 0.81 0.81 0.64 0.97 0.56]]
[[1.75 -0.57 -0.55 -1.09 -0.37 1.04 0.88 -0.92 0.64 0.97 1.61]
 [-0.57 -0.57 -1.31 -1.09 0.11 -1.03 0.43 -0.92 0.64 -1.03 0.50]
 [-0.57 1.74 0.57 -1.09 0.30 1.04 0.31 -0.92 0.64 0.97 -0.42]
 [-0.57 -0.57 1.42 0.92 -0.66 -0.34 0.58 -0.92 -1.56 -1.03 -0.19]
 [1.75 -0.57 0.57 0.92 -0.08 0.01 1.39 0.81 0.64 0.97 0.62]]
[0 0 0 0 0]
[0 1 0 0 0]


# Part 2 - Now let's make the ANN!

In [10]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

Using TensorFlow backend.


In [11]:
# Initialising the ANN
classifier = Sequential()

In [24]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_dim = 11))
# classifier.add(Dropout(p = 0.1))

In [25]:
# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_dim = 6))
# classifier.add(Dropout(p = 0.1))

In [26]:
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

In [27]:
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [28]:
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x11c514198>

# Part 3 - Making predictions and evaluating the model

In [29]:
# Keras evaluate method
score = classifier.evaluate(X_test, y_test)
print ("\n Loss and Accuracy Score:", score)

 Loss and Accuracy Score: [0.34020132279396059, 0.85499999999999998]


In [18]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)
print (y_pred)

[[0.13]
 [0.32]
 [0.14]
 ..., 
 [0.18]
 [0.10]
 [0.13]]


In [19]:
y_pred_bool = (y_pred > 0.5)
print (y_pred_bool)

[[False]
 [False]
 [False]
 ..., 
 [False]
 [False]
 [False]]


In [20]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_bool)

In [21]:
print (cm)
# print (cm[[0][0]])
print (cm[[0][0]][0])
# print (cm[[1][0]])
print (cm[[1][0]][1])

[[1548   47]
 [ 261  144]]
1548
144


In [22]:
print ('Accuracy:', (cm[[0][0]][0] + cm[[1][0]][1]) / len(y_pred_bool))

Accuracy: 0.846


# Part 4 - Evaluating, Improving and Tuning the ANN

In [12]:
# Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense

In [13]:
def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_dim = 6))
    classifier.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid', input_dim = 6))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

In [14]:
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, epochs = 20)

# K Fold Cross Validation method from sklearn.model_selection
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)

Epoch 1/10
Epoch 1/10
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 2/10
Epoch 2/10
  10/6400 [..............................] - ETA: 3s - loss: 0.5227 - acc: 0.8000
  10/6400 [..............................] - ETA: 2s - loss: 0.4487 - acc: 0.8000Epoch 3/10
 360/6400 [>.............................] - ETA: 1s - loss: 0.4320 - acc: 0.7972Epoch 3/10

Epoch 4/10
Epoch 4/10
Epoch 4/10
 470/6400 [=>............................] - ETA: 2s - loss: 0.4003 - acc: 0.8298Epoch 4/10
Epoch 5/10

 330/6400 [>.............................] - ETA: 1s - loss: 0.4447 - acc: 0.7970Epoch 5/10
Epoch 6/10
Epoch 6/10
 370/6400 [>.............................] - ETA: 2s - loss: 0.4150 - acc: 0.8297Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
 290/6400 [>.............................] - ETA: 2s - loss: 0.3796 - acc: 0.8276
Epoch 7/10
Epoch 8/10
 110/6400 [..............................] - ETA: 3s - loss: 0.4420 - acc: 0.8545
Epoch 8/10
Epoch 8/10
Epoch 9/10
 440/6400 [=>............................] - ETA: 2s - loss: 

In [15]:
#  now to see bias-variance tradeoff
# bias
mean = accuracies.mean()

# variance
variance = accuracies.std()

print ('mean', mean)
print ('variance',variance)

mean 0.846374994442
variance 0.00708651843628


In [None]:
# Tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {'batch_size': [25, 32],
              'epochs': [100, 500],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_