# Building Artificial Neural Network for Churn Prediction

So we will use processed churn data and we will build a Neural Network (ANN - MLP) using tensorflow-Keras for predicting the churn.

## Get the data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("./Churn_modelling_processed_data.csv")

In [3]:
data.head()

Unnamed: 0,CreditScoreScaled,Germany,Spain,GenderCoded,AgeScaled,Tenure,BalanceScaled,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalaryScaled,Exited
0,0.538,0,0,0,0.324324,2,0.0,1,1,1,0.506735,1
1,0.516,0,1,0,0.310811,1,0.334031,1,0,1,0.562709,0
2,0.304,0,0,0,0.324324,8,0.636357,3,1,0,0.569654,1
3,0.698,0,0,0,0.283784,1,0.0,2,0,0,0.46912,0
4,1.0,0,1,0,0.337838,2,0.500246,1,1,1,0.3954,0


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   CreditScoreScaled      10000 non-null  float64
 1   Germany                10000 non-null  int64  
 2   Spain                  10000 non-null  int64  
 3   GenderCoded            10000 non-null  int64  
 4   AgeScaled              10000 non-null  float64
 5   Tenure                 10000 non-null  int64  
 6   BalanceScaled          10000 non-null  float64
 7   NumOfProducts          10000 non-null  int64  
 8   HasCrCard              10000 non-null  int64  
 9   IsActiveMember         10000 non-null  int64  
 10  EstimatedSalaryScaled  10000 non-null  float64
 11  Exited                 10000 non-null  int64  
dtypes: float64(4), int64(8)
memory usage: 937.6 KB


## Features and Target

In [8]:
feature_list = data.columns.to_list()

target = feature_list.pop()

In [9]:
X = data[feature_list].values

y = data[target].values

In [10]:
X.shape

(10000, 11)

In [12]:
y.shape

(10000,)

## Split the data into train and test

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1923)

In [15]:
X_train.shape

(7500, 11)

In [16]:
X_test.shape

(2500, 11)

In [17]:
y_train.shape

(7500,)

In [18]:
y_test.shape

(2500,)

In [23]:
X_train[0].shape

(11,)

## Creating ANN-MLP model

In [24]:
import tensorflow as tf

In [22]:
from tensorflow.keras import models
from tensorflow.keras import layers, Input

In [25]:
# Create the input layer
inputs = Input(shape=X_train[0].shape)

# Create the first hidden layer
# let's consider 12 nodes in the first hidden layer
hidden_layer1 = layers.Dense(12, activation='relu')(inputs)

# Create the second hidden layer
# let's consider 8 nodes in the second hidden layer
hidden_layer2 = layers.Dense(8, activation='relu')(hidden_layer1)

# Create the output layer
output = layers.Dense(1, activation='sigmoid')(hidden_layer2)

In [39]:
# Create the model

ann_model = tf.keras.Model(inputs=inputs, outputs=output, name="ANN_model_for_Churn_Prediction")

In [40]:
print(ann_model.summary())

Model: "ANN_model_for_Churn_Prediction"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 11)]              0         
                                                                 
 dense (Dense)               (None, 12)                144       
                                                                 
 dense_1 (Dense)             (None, 8)                 104       
                                                                 
 dense_2 (Dense)             (None, 1)                 9         
                                                                 
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________
None


## Compile and train the model

In [43]:
# compiling the model
# While compiling you need to specify three things
# 1. optimizer : We usually use 'adam'
# 2. loss
# 3. metrics

ann_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC()])

In [44]:
ann_model.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x2a10dc6c4f0>

## Model evaluation

In [56]:
# Find the prediction of the model on the test dataset

y_prob = ann_model.predict(X_test)



In [65]:
y_pred = y_prob.flatten() > 0.5

In [70]:
# Calculate Accuracy and the confusion matrix

from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

In [69]:
# accuracy 

accuracy_score(y_true=y_test, y_pred=y_pred)

0.866

In [71]:
# roc-auc

roc_auc_score(y_true=y_test, y_score=y_prob.flatten())

0.8521650233410626

In [72]:
# f1-score

f1_score(y_true=y_test, y_pred=y_pred)

0.6035502958579881

In [73]:
# precision

precision_score(y_true=y_test, y_pred=y_pred)

0.7544378698224852

In [74]:
# recall

recall_score(y_true=y_test, y_pred=y_pred)

0.5029585798816568

In [77]:
# confusion matrix

pd.DataFrame(confusion_matrix(y_true=y_test, y_pred=y_pred), index=['actual-0', 'actual-1'], columns=['predicted-0', 'predicted-1'])

Unnamed: 0,predicted-0,predicted-1
actual-0,1910,83
actual-1,252,255
