Bank customer churn prediction using Artificial Neural Network (ANN)

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
df = pd.read_csv('Bank_data.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
#dropping columns which are not useful
df = df.drop(['CustomerId', 'Surname', 'RowNumber', 'Geography'], axis = 1)

In [4]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
df.dtypes

Unnamed: 0,0
CreditScore,int64
Gender,object
Age,int64
Tenure,int64
Balance,float64
NumOfProducts,int64
HasCrCard,int64
IsActiveMember,int64
EstimatedSalary,float64
Exited,int64


In [6]:
df['Gender'].replace({'Male':0, 'Female':1}, inplace=True)
df.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Gender'].replace({'Male':0, 'Female':1}, inplace=True)
  df['Gender'].replace({'Male':0, 'Female':1}, inplace=True)


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,1,42,2,0.0,1,1,1,101348.88,1
1,608,1,41,1,83807.86,1,0,1,112542.58,0
2,502,1,42,8,159660.8,3,1,0,113931.57,1
3,699,1,39,1,0.0,2,0,0,93826.63,0
4,850,1,43,2,125510.82,1,1,1,79084.1,0


In [7]:
#scaling columns tenure, creditscore, age, Balance, Estimatedsalary

cols_to_scale = ['CreditScore','Age','Tenure', 'Balance', 'EstimatedSalary']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])

In [8]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,0.538,1,0.324324,0.2,0.0,1,1,1,0.506735,1
1,0.516,1,0.310811,0.1,0.334031,1,0,1,0.562709,0
2,0.304,1,0.324324,0.8,0.636357,3,1,0,0.569654,1
3,0.698,1,0.283784,0.1,0.0,2,0,0,0.46912,0
4,1.0,1,0.337838,0.2,0.500246,1,1,1,0.3954,0


In [9]:
#train test split

X = df.drop('Exited', axis = 1)
y = df['Exited']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)

In [10]:
print(X_train.shape)
print(X_test.shape)

(8000, 9)
(2000, 9)


In [11]:
#Build a model (ANN) in tensorflow/keras

import tensorflow as tf
from tensorflow import keras

model = keras.Sequential([keras.layers.Dense(9, input_shape=(9,), activation='relu'),
                          keras.layers.Dense(5, activation = 'relu'),
                          keras.layers.Dense(1, activation = 'sigmoid')
])

model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs = 100)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7967 - loss: 0.5541
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8023 - loss: 0.4784
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7971 - loss: 0.4661
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8027 - loss: 0.4550
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8097 - loss: 0.4457
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8145 - loss: 0.4427
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8106 - loss: 0.4439
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8126 - loss: 0.4431
Epoch 9/100
[1m250/250[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x793d247a49d0>

In [12]:
model.evaluate(X_test, y_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8422 - loss: 0.3735


[0.3685443103313446, 0.8500000238418579]

In [13]:
yp = model.predict(X_test)
yp[:5]

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[0.02042402],
       [0.07211092],
       [0.11838776],
       [0.1151911 ],
       [0.08878247]], dtype=float32)

In [14]:
y_pred = []

for i in yp:
    if i>0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

y_pred[:10]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

In [15]:
y_test[:10]

Unnamed: 0,Exited
7054,0
442,0
3954,0
2288,0
3196,0
6178,0
8351,0
5658,1
2065,0
413,1


In [16]:
#metrics

from sklearn.metrics import confusion_matrix , classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.86      0.97      0.91      1595
           1       0.76      0.38      0.50       405

    accuracy                           0.85      2000
   macro avg       0.81      0.67      0.71      2000
weighted avg       0.84      0.85      0.83      2000



We can see the diff in f1_score of class 0 and 1....this is due to imbalance of classes 0 and 1

In [18]:
df.Exited.value_counts()

Unnamed: 0_level_0,count
Exited,Unnamed: 1_level_1
0,7963
1,2037


In [20]:
#Method 1: Undersampling

# Class count
count_class_0, count_class_1 = df.Exited.value_counts()

# Divide by class
df_class_0 = df[df['Exited'] == 0]
df_class_1 = df[df['Exited'] == 1]
print(df_class_0.shape)
print(df_class_1.shape)
print(count_class_1)
print(count_class_0)


(7963, 10)
(2037, 10)
2037
7963


In [23]:
# Undersample 0-class and concat the DataFrames of both class
df_class_0_under = df_class_0.sample(count_class_1)   #it will create sample/random values(1869 samples)
df_test_under = pd.concat([df_class_0_under, df_class_1], axis=0)

print('Random under-sampling:')
print(df_test_under.Exited.value_counts())

Random under-sampling:
Exited
0    2037
1    2037
Name: count, dtype: int64


In [24]:
X = df_test_under.drop('Exited',axis='columns')
y = df_test_under['Exited']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [27]:
#Build a model (ANN) in tensorflow/keras
def ANN(X_train, y_train, X_test, y_test, loss, weights):

    model = keras.Sequential([keras.layers.Dense(9, input_shape=(9,), activation='relu'),
                          keras.layers.Dense(5, activation = 'relu'),
                          keras.layers.Dense(1, activation = 'sigmoid')
                          ])

    model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=['accuracy'])

    if weights == -1:
        model.fit(X_train, y_train, epochs=100)
    else:
        model.fit(X_train, y_train, epochs=100, class_weight = weights)

    print(model.evaluate(X_test, y_test))

    y_preds = model.predict(X_test)
    y_preds = np.round(y_preds)

    print("Classification Report: \n", classification_report(y_test, y_preds))

    return y_preds

In [28]:
y_preds = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4580 - loss: 0.6962
Epoch 2/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4722 - loss: 0.6922
Epoch 3/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5219 - loss: 0.6906
Epoch 4/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5333 - loss: 0.6888
Epoch 5/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5857 - loss: 0.6855
Epoch 6/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5995 - loss: 0.6825
Epoch 7/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6087 - loss: 0.6745
Epoch 8/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6410 - loss: 0.6669
Epoch 9/100
[1m102/102[0m [32m━━━━━━━━━━━

In [30]:
#Method2: Oversampling

# Oversample 1-class and concat the DataFrames of both classes
df_class_1_over = df_class_1.sample(count_class_0, replace=True)
df_test_over = pd.concat([df_class_0, df_class_1_over], axis=0)

print('Random over-sampling:')
print(df_test_over.Exited.value_counts())

Random over-sampling:
Exited
0    7963
1    7963
Name: count, dtype: int64


In [31]:
X = df_test_over.drop('Exited',axis='columns')
y = df_test_over['Exited']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [32]:
y_preds = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5500 - loss: 0.6904
Epoch 2/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6521 - loss: 0.6350
Epoch 3/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6751 - loss: 0.5972
Epoch 4/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6966 - loss: 0.5768
Epoch 5/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7048 - loss: 0.5659
Epoch 6/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7071 - loss: 0.5595
Epoch 7/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7269 - loss: 0.5426
Epoch 8/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7304 - loss: 0.5401
Epoch 9/100
[1m399/399[0m [32m━━━━━━━━━━━

In [33]:
#Method3: SMOTE

X = df.drop('Exited',axis='columns')
y = df['Exited']

In [34]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy='minority')
X_sm, y_sm = smote.fit_resample(X, y)

y_sm.value_counts()

Unnamed: 0_level_0,count
Exited,Unnamed: 1_level_1
1,7963
0,7963


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X_sm, y_sm, test_size=0.2, random_state=15, stratify=y_sm)

In [36]:
y_preds = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6038 - loss: 0.6700
Epoch 2/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6791 - loss: 0.6048
Epoch 3/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6925 - loss: 0.5770
Epoch 4/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7118 - loss: 0.5528
Epoch 5/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7297 - loss: 0.5381
Epoch 6/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7426 - loss: 0.5266
Epoch 7/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7391 - loss: 0.5238
Epoch 8/100
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7436 - loss: 0.5097
Epoch 9/100
[1m399/399[0m [32