In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from tensorflow import keras
from tensorflow.keras import models, layers, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report 

In [2]:
df = pd.read_csv('Churn_Modelling_Bank.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [5]:
df.drop(['Surname','RowNumber','CustomerId'], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
df.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [8]:
df = pd.get_dummies(df, columns = ['Geography','Gender'])

In [9]:
df.columns

Index(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited', 'Geography_France',
       'Geography_Germany', 'Geography_Spain', 'Gender_Female', 'Gender_Male'],
      dtype='object')

In [10]:
df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,0,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,0,1,1,0


In [11]:
df.columns

Index(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited', 'Geography_France',
       'Geography_Germany', 'Geography_Spain', 'Gender_Female', 'Gender_Male'],
      dtype='object')

In [12]:
col_to_scale = ['CreditScore','Age', 'Balance','EstimatedSalary']

In [13]:
from sklearn.preprocessing import MinMaxScaler

In [14]:
sc = MinMaxScaler()

In [15]:
df[col_to_scale] = sc.fit_transform(df[col_to_scale])

In [16]:
df

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,0.538,0.324324,2,0.000000,1,1,1,0.506735,1,1,0,0,1,0
1,0.516,0.310811,1,0.334031,1,0,1,0.562709,0,0,0,1,1,0
2,0.304,0.324324,8,0.636357,3,1,0,0.569654,1,1,0,0,1,0
3,0.698,0.283784,1,0.000000,2,0,0,0.469120,0,1,0,0,1,0
4,1.000,0.337838,2,0.500246,1,1,1,0.395400,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.842,0.283784,5,0.000000,2,1,0,0.481341,0,1,0,0,0,1
9996,0.332,0.229730,10,0.228657,1,1,1,0.508490,0,1,0,0,0,1
9997,0.718,0.243243,7,0.000000,1,0,1,0.210390,1,1,0,0,1,0
9998,0.844,0.324324,3,0.299226,2,1,0,0.464429,1,0,1,0,0,1


In [17]:
df.columns

Index(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited', 'Geography_France',
       'Geography_Germany', 'Geography_Spain', 'Gender_Female', 'Gender_Male'],
      dtype='object')

In [23]:
x = df.drop('Exited', axis=1)

In [24]:
y = df['Exited']

In [25]:
from imblearn.over_sampling import SMOTE

In [26]:
sm = SMOTE(random_state=123)

In [27]:
x_bal, y_bal = sm.fit_resample(x,y)

In [28]:
x_bal.shape, y_bal.shape

((15926, 13), (15926,))

In [29]:
y.value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [30]:
x_train, x_test, y_train, y_test = train_test_split(x_bal,y_bal,test_size=0.25, random_state=123)

In [31]:
x_train.shape, x_test.shape

((11944, 13), (3982, 13))

In [32]:
y_train.shape, y_test.shape

((11944,), (3982,))

In [33]:
y_train.value_counts()

1    5973
0    5971
Name: Exited, dtype: int64

In [34]:
y_train

6012     0
11385    1
12848    1
15525    1
13323    1
        ..
12252    1
1346     0
11646    1
15725    1
3582     0
Name: Exited, Length: 11944, dtype: int64

In [46]:
ann = keras.Sequential([
    layers.Dense(500, activation='relu', kernel_initializer='uniform'),
    layers.Dense(200, activation = 'tanh'),
    layers.Dense(1, activation='sigmoid')
])

In [47]:
ann.compile(optimizer='adam', loss='binary_crossentropy',metrics = ['accuracy'])

In [48]:
ann.fit(x_train, y_train, epochs=100, use_multiprocessing=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x20f40458d88>

In [49]:
ann.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 500)               7000      
                                                                 
 dense_10 (Dense)            (None, 200)               100200    
                                                                 
 dense_11 (Dense)            (None, 1)                 201       
                                                                 
Total params: 107,401
Trainable params: 107,401
Non-trainable params: 0
_________________________________________________________________


In [50]:
ann.evaluate(x_test, y_test)



[0.40470579266548157, 0.8309894800186157]

In [51]:
yp = ann.predict(x_test)

In [52]:
yp[:10].reshape(-1,)

array([0.89291704, 0.6531733 , 0.9633107 , 0.918157  , 0.0529891 ,
       0.07171252, 0.7877283 , 0.39983243, 0.6240022 , 0.9888934 ],
      dtype=float32)

In [53]:
y_pred = []
for i in yp:
    if i > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [54]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.78      0.82      1992
           1       0.80      0.88      0.84      1990

    accuracy                           0.83      3982
   macro avg       0.83      0.83      0.83      3982
weighted avg       0.83      0.83      0.83      3982



In [55]:
confusion_matrix(y_test, y_pred)

array([[1553,  439],
       [ 234, 1756]], dtype=int64)