## Importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading the data into df and basic exploratory

In [2]:
df = pd.read_csv('Churn_Modelling.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Making X and y from the data

In [4]:
# Dropping the useless variables and storing in X
X = df.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)

In [5]:
y = df.Exited

## Browsing through the sklearn library

In [6]:
import sklearn
[x for x in dir(sklearn) if '__' not in x]

['_config',
 '_distributor_init',
 'base',
 'clone',
 'config_context',
 'exceptions',
 'externals',
 'get_config',
 'logger',
 'logging',
 'os',
 're',
 'set_config',
 'setup_module',
 'show_versions',
 'sys',
 'utils']

## Train Test Split

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Dummy Variable generation

In [9]:
from sklearn.preprocessing import OneHotEncoder

In [10]:
from sklearn.compose import make_column_transformer

In [11]:
X_train.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
7389,667,Spain,Female,34,5,0.0,2,1,0,163830.64
9275,427,Germany,Male,42,1,75681.52,1,1,1,57098.0
2995,535,France,Female,29,2,112367.34,1,1,0,185630.76
5316,654,Spain,Male,40,5,105683.63,1,1,0,173617.09
356,850,Spain,Female,57,8,126776.3,2,1,1,132298.49


In [12]:
column_trans = make_column_transformer((OneHotEncoder(drop='first'), ['Geography', 'Gender'])
                                       , remainder='passthrough')

In [13]:
# DataFrame helps me visualize the data better! :)
pd.DataFrame(column_trans.fit_transform(X_train))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.0,1.0,0.0,667.0,34.0,5.0,0.00,2.0,1.0,0.0,163830.64
1,1.0,0.0,1.0,427.0,42.0,1.0,75681.52,1.0,1.0,1.0,57098.00
2,0.0,0.0,0.0,535.0,29.0,2.0,112367.34,1.0,1.0,0.0,185630.76
3,0.0,1.0,1.0,654.0,40.0,5.0,105683.63,1.0,1.0,0.0,173617.09
4,0.0,1.0,0.0,850.0,57.0,8.0,126776.30,2.0,1.0,1.0,132298.49
...,...,...,...,...,...,...,...,...,...,...,...
7995,1.0,0.0,0.0,594.0,32.0,4.0,120074.97,2.0,1.0,1.0,162961.79
7996,0.0,1.0,0.0,794.0,22.0,4.0,114440.24,1.0,1.0,1.0,107753.07
7997,0.0,0.0,1.0,738.0,35.0,5.0,161274.05,2.0,1.0,0.0,181429.87
7998,0.0,1.0,0.0,590.0,38.0,9.0,0.00,2.0,1.0,1.0,148750.16


In [14]:
X_train = column_trans.fit_transform(X_train)

## Feature Scaling

In [15]:
from sklearn.preprocessing import StandardScaler

In [16]:
sc = StandardScaler()

In [17]:
pd.DataFrame(sc.fit_transform(X_train))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,-0.569844,1.743090,-1.091687,0.169582,-0.464608,0.006661,-1.215717,0.809503,0.642595,-1.032270,1.106432
1,1.754865,-0.573694,0.916013,-2.304559,0.301026,-1.377440,-0.006312,-0.921591,0.642595,0.968738,-0.748664
2,-0.569844,-0.573694,-1.091687,-1.191196,-0.943129,-1.031415,0.579935,-0.921591,0.642595,-1.032270,1.485335
3,-0.569844,1.743090,0.916013,0.035566,0.109617,0.006661,0.473128,-0.921591,0.642595,-1.032270,1.276528
4,-0.569844,1.743090,-1.091687,2.056114,1.736588,1.044737,0.810193,0.809503,0.642595,0.968738,0.558378
...,...,...,...,...,...,...,...,...,...,...,...
7995,1.754865,-0.573694,-1.091687,-0.582970,-0.656016,-0.339364,0.703104,0.809503,0.642595,0.968738,1.091330
7996,-0.569844,1.743090,-1.091687,1.478815,-1.613058,-0.339364,0.613060,-0.921591,0.642595,0.968738,0.131760
7997,-0.569844,-0.573694,0.916013,0.901515,-0.368904,0.006661,1.361474,0.809503,0.642595,-1.032270,1.412320
7998,-0.569844,1.743090,-1.091687,-0.624205,-0.081791,1.390762,-1.215717,0.809503,0.642595,0.968738,0.844321


In [18]:
X_train = sc.fit_transform(X_train)

In [19]:
X_train

array([[-0.5698444 ,  1.74309049, -1.09168714, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [ 1.75486502, -0.57369368,  0.91601335, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.5698444 , -0.57369368, -1.09168714, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [-0.5698444 , -0.57369368,  0.91601335, ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-0.5698444 ,  1.74309049, -1.09168714, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [ 1.75486502, -0.57369368, -1.09168714, ...,  0.64259497,
        -1.03227043,  0.32472465]])

In [20]:
# Transforming the test data as well
X_test = column_trans.transform(X_test)
X_test = sc.transform(X_test)

In [21]:
X_test

array([[ 1.75486502, -0.57369368, -1.09168714, ...,  0.64259497,
         0.9687384 ,  1.61085707],
       [-0.5698444 , -0.57369368, -1.09168714, ...,  0.64259497,
        -1.03227043,  0.49587037],
       [-0.5698444 ,  1.74309049, -1.09168714, ...,  0.64259497,
         0.9687384 , -0.42478674],
       ...,
       [-0.5698444 ,  1.74309049,  0.91601335, ...,  0.64259497,
        -1.03227043,  0.71888467],
       [ 1.75486502, -0.57369368,  0.91601335, ...,  0.64259497,
         0.9687384 , -1.54507805],
       [ 1.75486502, -0.57369368,  0.91601335, ...,  0.64259497,
        -1.03227043,  1.61255917]])

## Model Building

In [22]:
import keras
[x for x in dir(keras) if '__' not in x]

Using TensorFlow backend.


['Input',
 'Model',
 'Sequential',
 'absolute_import',
 'activations',
 'applications',
 'backend',
 'callbacks',
 'constraints',
 'datasets',
 'engine',
 'initializers',
 'layers',
 'legacy',
 'losses',
 'metrics',
 'models',
 'optimizers',
 'preprocessing',
 'regularizers',
 'utils',
 'wrappers']

In [23]:
from keras.models import Sequential
from keras.layers import Dense

In [24]:
# Initializing ANN
classifier = Sequential()

In [25]:
X_train.shape

(8000, 11)

In [26]:
y_train.values.shape

(8000,)

In [27]:
# Adding Layers
# Input layer and the first hidden layer
classifier.add(Dense(units = 6, activation = 'relu'
                     , kernel_initializer = 'uniform'
                     , input_dim = X_train.shape[1])) 
# Choosing the units(number of nodes) to be 6 as that is the average of 11 and 1
# Cause 'Not being an artist now!! :P'

In [28]:
# Adding the second hidden layer
classifier.add(Dense(units = 6, activation = 'relu'
                     , kernel_initializer = 'uniform'))

In [29]:
# Adding the output layer
classifier.add(Dense(units = 1
                     , kernel_initializer = 'uniform'
                     , activation = 'sigmoid'))
# Sigmoid as the activation function as it is a binary classifier

In [30]:
# Compiling the ANN
classifier.compile(optimizer='adam'
                   , loss='binary_crossentropy'
                   , metrics=['accuracy'])
# Applying SGD algo to the entire network. Defining optimizer for weights 
# and loss as binary_crossentropy(binary classfier)

In [31]:
# Fitting the ANN to the Training set
classifier.fit(x = X_train, y = y_train
               , batch_size = 10
               , epochs = 100
               , verbose = 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x1a3a31cd90>

## RW

In [None]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [32]:
classifier.predict(X_test) #Check when applying softmax in the output layer O.o Shouldn't give proba ideally!

array([[0.3116155 ],
       [0.3804688 ],
       [0.16958803],
       ...,
       [0.15399605],
       [0.19148187],
       [0.22186567]], dtype=float32)

In [33]:
classifier.predict_proba(X_test)

array([[0.3116155 ],
       [0.3804688 ],
       [0.16958803],
       ...,
       [0.15399605],
       [0.19148187],
       [0.22186567]], dtype=float32)

In [34]:
pd.DataFrame(classifier.predict_classes(X_test, verbose=1))[0].value_counts()



0    1701
1     299
Name: 0, dtype: int64

In [35]:
pd.Series(np.where(((classifier.predict(X_test)) > .5).reshape(2000,)==False, 0, 1)).value_counts()

0    1701
1     299
dtype: int64

## Predicting

In [36]:
# Predicting the Test set results
y_pred = classifier.predict_classes(X_test)

## Confusion Matrix

In [37]:
from sklearn.metrics import confusion_matrix

In [38]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[1508,   87],
       [ 193,  212]])

## Accuracy

In [40]:
tptn=0
for i in range(2):
    tptn = tptn + confusion_matrix(y_true=y_test, y_pred=y_pred)[i][i]
print("The accuracy is: ",tptn/y_test.shape[0]*100, "%", sep='')

The accuracy is: 86.0%


## Saving the model

In [44]:
# Saving the model for reloading later
import pickle
filename = 'ANN_model.sav'
pickle.dump(classifier, open(filename, 'wb'))

In [106]:
classifier = pickle.load(open(filename, 'rb'))

In [113]:
classifier.predict_classes(X_test)

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int32)

## Creating a sample test to run the model on

In [70]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [87]:
# Checking previous data for format of sample
for i in X.columns:
    print(i+":", X[i][0], " | dtype is:", X[i].dtype)

CreditScore: 619  | dtype is: int64
Geography: France  | dtype is: object
Gender: Female  | dtype is: object
Age: 42  | dtype is: int64
Tenure: 2  | dtype is: int64
Balance: 0.0  | dtype is: float64
NumOfProducts: 1  | dtype is: int64
HasCrCard: 1  | dtype is: int64
IsActiveMember: 1  | dtype is: int64
EstimatedSalary: 101348.88  | dtype is: float64


In [101]:
arr = np.array([[600, "France", "Male", 40, 3, 60000.0, 2, 1, 1, 50000.0]])
X_sample = pd.DataFrame(arr
             , columns =['CreditScore', 'Geography',
                         'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
                         'IsActiveMember', 'EstimatedSalary'])

## Preprocessing and using model to predict on X_sample data

In [103]:
# One hot encoding
X_sample = column_trans.transform(X_sample)

In [105]:
# Scaling
X_sample = sc.transform(X_sample)

In [114]:
classifier.predict_classes(X_sample)

array([[0]], dtype=int32)

In [118]:
if classifier.predict_classes(X_sample)[0][0]==0:
    print("The customer exited")
else:
    print("The customer didn't exit")

The customer exited
