In [1]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
np.set_printoptions(threshold=sys.maxsize)

In [5]:
df = pd.read_csv('Churn_Modelling.csv')
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [93]:
X = df.iloc[:, 3:13].values
y = df.iloc[:, 13].values
X[:5]

array([[619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
       [608, 'Spain', 'Female', 41, 1, 83807.86, 1, 0, 1, 112542.58],
       [502, 'France', 'Female', 42, 8, 159660.8, 3, 1, 0, 113931.57],
       [699, 'France', 'Female', 39, 1, 0.0, 2, 0, 0, 93826.63],
       [850, 'Spain', 'Female', 43, 2, 125510.82, 1, 1, 1, 79084.1]],
      dtype=object)

In [94]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# labelencoder - provides a very efficient tool for encoding
# the levels of categorical features into numeric values. 
#X[:,2] entire gender column
labelencoder_gender = LabelEncoder()
X[:, 2] = labelencoder_gender.fit_transform(X[:, 2])
X[:5]

array([[619, 'France', 0, 42, 2, 0.0, 1, 1, 1, 101348.88],
       [608, 'Spain', 0, 41, 1, 83807.86, 1, 0, 1, 112542.58],
       [502, 'France', 0, 42, 8, 159660.8, 3, 1, 0, 113931.57],
       [699, 'France', 0, 39, 1, 0.0, 2, 0, 0, 93826.63],
       [850, 'Spain', 0, 43, 2, 125510.82, 1, 1, 1, 79084.1]],
      dtype=object)

In [95]:
from sklearn.compose import ColumnTransformer
# remainder - by default only the column transformed would be returned by 
# transformer,'passthrough' they will be unchanged, 'ohe' just a name of t-former
ct = ColumnTransformer([('ohe', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype = str) # float? working fine too
X = X[:, 1:]
X[:5]

array([['0.0', '0.0', '619', '0', '42', '2', '0.0', '1', '1', '1',
        '101348.88'],
       ['0.0', '1.0', '608', '0', '41', '1', '83807.86', '1', '0', '1',
        '112542.58'],
       ['0.0', '0.0', '502', '0', '42', '8', '159660.8', '3', '1', '0',
        '113931.57'],
       ['0.0', '0.0', '699', '0', '39', '1', '0.0', '2', '0', '0',
        '93826.63'],
       ['0.0', '1.0', '850', '0', '43', '2', '125510.82', '1', '1', '1',
        '79084.1']], dtype='<U9')

In [96]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                    test_size = 0.2, random_state = 0)

In [97]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [79]:
X_test[:3]

array([[ 1.75486502, -0.57369368, -0.55204276, -1.09168714, -0.36890377,
         1.04473698,  0.8793029 , -0.92159124,  0.64259497,  0.9687384 ,
         1.61085707],
       [-0.5698444 , -0.57369368, -1.31490297, -1.09168714,  0.10961719,
        -1.031415  ,  0.42972196, -0.92159124,  0.64259497, -1.03227043,
         0.49587037],
       [-0.5698444 ,  1.74309049,  0.57162971, -1.09168714,  0.30102557,
         1.04473698,  0.30858264, -0.92159124,  0.64259497,  0.9687384 ,
        -0.42478674]])

In [100]:
from tensorflow.keras.models import Sequential 
from keras.layers import Dense
# sequential allows to build an ANN as a sequence of layers
# dense allows to create fully conneted network
# kernel_initilaizer is used to initialize a weights in a network

#classifier = Sequential()
#classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
#classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
#classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
#classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

classifier = Sequential([
    Dense(units = 11, kernel_initializer='uniform', activation='relu', input_dim=11),
    Dense(units = 11, kernel_initializer='uniform', activation='relu'),
    Dense(units = 1, kernel_initializer='uniform', activation='sigmoid')
])
classifier.compile(optimizer = 'adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])


In [101]:
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
# batch size = at a time it takes 10 rows 
# epochs = alogrithm run across the network

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7fa7edb42290>

In [102]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)



In [103]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)

[[1523   72]
 [ 213  192]]


0.8575

In [104]:
classifier.save('ann_model.h5')