### Packages Required

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score,classification_report

from keras.models import Sequential
from keras.layers import Dense

### Datasets

In [2]:
df = pd.read_csv('C:/Users/Nithin/Downloads/Bank customer churn_AI_ANN/Datasets/Churn_Modelling.csv')

In [3]:
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [5]:
#One hot encoding
df = pd.get_dummies(df,columns=['Geography','Gender'],drop_first=True)

In [6]:
#Drop unnecessary columns
df.drop(columns=['RowNumber','CustomerId','Surname'],inplace=True)

### Train Test Split

In [7]:
y = df.Exited
x = df[df.columns.difference(['Exited'])]

In [8]:
train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.3,random_state=123)

### Standardising the data

In [9]:
sc = StandardScaler()
sc = sc.fit(train_x)
train_x_std = sc.transform(train_x)
train_x_std = pd.DataFrame(train_x_std,columns=train_x.columns)

In [10]:
test_x_std = sc.transform(test_x)
test_x_std = pd.DataFrame(test_x_std,columns=test_x.columns)

In [11]:
train_x_std.shape

(7000, 11)

### ANN Model

In [12]:
ann = Sequential()

In [15]:
ann.add(Dense(4, activation = 'relu', input_dim = 11)) #Hidden layer 1
ann.add(Dense(4, activation = 'relu')) #Hidden layer 2
ann.add(Dense(1,  activation = 'sigmoid')) #Output Layer

In [16]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [17]:
ann.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 4)                 48        
                                                                 
 dense_1 (Dense)             (None, 4)                 20        
                                                                 
 dense_2 (Dense)             (None, 1)                 5         
                                                                 
Total params: 73
Trainable params: 73
Non-trainable params: 0
_________________________________________________________________


In [18]:
ann.get_weights()

[array([[-0.12640536,  0.40007728, -0.34329772,  0.29484254],
        [ 0.32304054, -0.4511248 , -0.5389521 ,  0.01902175],
        [ 0.23736972,  0.1134221 ,  0.6079479 , -0.1454699 ],
        [-0.5487182 , -0.13096792,  0.4322024 , -0.54568845],
        [-0.10975927, -0.53420645, -0.15435275, -0.5476223 ],
        [-0.36034897, -0.04925799,  0.14160699, -0.4523706 ],
        [ 0.48750705, -0.5381731 ,  0.3602683 , -0.36987945],
        [-0.5531308 ,  0.4146828 ,  0.08435369,  0.264543  ],
        [ 0.48900312, -0.39520976, -0.0167352 , -0.43950513],
        [-0.13426462,  0.2589953 ,  0.4659044 , -0.37367314],
        [ 0.02304602, -0.12459284, -0.2955045 , -0.33554447]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32),
 array([[-0.30094308,  0.8509043 ,  0.44408792,  0.25002676],
        [-0.0142473 , -0.7294209 , -0.31541008,  0.7307343 ],
        [ 0.255547  ,  0.720674  , -0.31675398, -0.8402681 ],
        [-0.14844114,  0.64074045,  0.28004187, -0.25097698]],
    

In [19]:
ann.fit(train_x_std,train_y,batch_size=500,epochs=50,validation_data=[test_x_std,test_y])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x23a31f0d580>

In [21]:
pred_train = ann.predict(train_x)
pred_test = ann.predict(test_x)



### Evaluation

In [24]:
print(roc_auc_score(train_y,pred_train))
print(roc_auc_score(test_y,pred_test))

0.4226664464337314
0.4391162718473405


In [27]:
y_pred = pred_train>0.2
print(classification_report(train_y,y_pred))

              precision    recall  f1-score   support

           0       0.74      0.07      0.12      5568
           1       0.20      0.91      0.33      1432

    accuracy                           0.24      7000
   macro avg       0.47      0.49      0.23      7000
weighted avg       0.63      0.24      0.16      7000



In [28]:
y_pred = pred_test>0.2
print(classification_report(test_y,y_pred))

              precision    recall  f1-score   support

           0       0.76      0.07      0.13      2395
           1       0.20      0.91      0.33       605

    accuracy                           0.24      3000
   macro avg       0.48      0.49      0.23      3000
weighted avg       0.64      0.24      0.17      3000

