# Churn prediction
Based on the blog https://medium.com/@pushkarmandot/build-your-first-deep-learning-neural-network-model-using-keras-in-python-a90b5864116d

In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

np.random.seed(301)

Using TensorFlow backend.


In [2]:
churn_data = pd.read_csv("Churn_Modelling.csv")

In [3]:
churn_data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
churn_data.tail()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.0,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.0,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1
9999,10000,15628319,Walker,792,France,Female,28,4,130142.79,1,1,0,38190.78,0


In [5]:
churn_data.shape

(10000, 14)

In [6]:
X = churn_data.iloc[:,3:13].values # .values converts this sliced df into ndarray
y = churn_data.iloc[:, 13].values

In [7]:
X[:,1] = LabelEncoder().fit_transform(X[:, 1])
X[:, 2] = LabelEncoder().fit_transform(X[:, 2])

In [8]:
X.shape

(10000, 10)

In [9]:
X

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

In [10]:
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [11]:
X.shape

(10000, 11)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [13]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1)

In [14]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_val = sc.transform(X_val)

In [15]:
X_train

array([[ 1.72058015, -0.5739289 ,  1.8740067 , ..., -1.54384541,
        -1.03390789, -1.7242227 ],
       [-0.58119931, -0.5739289 ,  1.02986019, ...,  0.64773325,
        -1.03390789,  0.46619827],
       [-0.58119931, -0.5739289 ,  1.3008208 , ...,  0.64773325,
         0.96720415,  0.75196518],
       ...,
       [-0.58119931,  1.74237609,  0.85269364, ...,  0.64773325,
         0.96720415,  1.40718361],
       [-0.58119931,  1.74237609,  0.54004678, ...,  0.64773325,
        -1.03390789, -0.03853836],
       [-0.58119931, -0.5739289 ,  0.62341928, ..., -1.54384541,
        -1.03390789,  0.16871132]])

In [20]:
churner_nn = Sequential()
dense_classes = [15, 10, 5]
drops = [0.15, 0.1]
churner_nn.add(Dense(dense_classes[0], activation = 'relu', input_dim = 11))
churner_nn.add(Dropout(drops[1]))
churner_nn.add(Dense(dense_classes[1], activation = 'relu'))
churner_nn.add(Dropout(drops[1]))
churner_nn.add(Dense(dense_classes[1], activation = 'relu'))
churner_nn.add(Dropout(drops[1]))
churner_nn.add(Dense(dense_classes[2], activation = 'relu'))

In [21]:
churner_nn.add(Dense(output_dim = 1, activation = 'sigmoid'))

  """Entry point for launching an IPython kernel.


In [22]:
churner_nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [23]:
churner_nn.fit(X_train, y_train, batch_size = 100, nb_epoch = 70)

Instructions for updating:
Use tf.cast instead.


  """Entry point for launching an IPython kernel.


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


<keras.callbacks.History at 0x7f07de502320>

In [24]:
y_pred_val = churner_nn.predict(X_val)
y_pred_val = (y_pred_val > 0.5)

In [25]:
cm1 = confusion_matrix(y_val, y_pred_val)

In [26]:
cm1

array([[600,  41],
       [ 75,  84]])

In [27]:
(cm1[0][0]+cm1[1][1])/cm1.sum()

0.855

In [28]:
y_pred_test = churner_nn.predict(X_test)
y_pred_test = (y_pred_test > 0.5)

In [29]:
cm2 = confusion_matrix(y_test, y_pred_test)

In [30]:
(cm2[0][0]+cm2[1][1])/cm2.sum()

0.866

In [31]:
cm2

array([[1537,   84],
       [ 184,  195]])

In [32]:
cm1

array([[600,  41],
       [ 75,  84]])

In [33]:
score1 = churner_nn.evaluate(X_val, y_val)



In [34]:
score1

[0.34154426723718645, 0.855]

In [35]:
score2 = churner_nn.evaluate(X_test, y_test)



In [36]:
score2

[0.33121792268753053, 0.866]