### Packages Required

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score,confusion_matrix,classification_report

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data
import torch.optim as optim

### Datasets

In [4]:
df = pd.read_csv('C:/Users/Nithin/Downloads/Churn_AI_Keras/Datasets/Churn_Modelling.csv')
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [6]:
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

**--Drop Unwanted columns**

In [7]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [8]:
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'],axis=1,inplace=True)

In [9]:
df.select_dtypes('object')

Unnamed: 0,Geography,Gender
0,France,Female
1,Spain,Female
2,France,Female
3,France,Female
4,Spain,Female
...,...,...
9995,France,Male
9996,France,Male
9997,France,Female
9998,Germany,Male


In [10]:
df_final = pd.get_dummies(df,columns=['Geography','Gender'],drop_first=True)

### Train Test Split

In [11]:
x = df_final[df_final.columns.difference(['Exited'])]
y = df_final.Exited

In [12]:
train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.3,random_state=123)

In [22]:
sc = StandardScaler()
train_x_std = sc.fit_transform(train_x)
test_x_std = sc.transform(test_x)

In [None]:
#Tensor conversion
train_x_std = torch.from_numpy(train_x_std)
train_y = torch.from_numpy(train_y.values).view(-1,1)
test_x_std = torch.from_numpy(test_x_std)
test_y = torch.from_numpy(test_y.values).view(-1,1)

In [24]:
train_x_std.shape

torch.Size([7000, 11])

### Model

In [36]:
class ANN(nn.Module):
    def __init__(self, input_dim = 11, output_dim = 1):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 6) #Hidden Layer 1
        self.fc2 = nn.Linear(6, 6)         #Hidden Layer2
        self.output_layer = nn.Linear(6,output_dim) #Output Layer

    
    # Feed Forward Function
    def forward(self, x):
        x = F.relu(self.fc1(x))    # Add ReLU activation function to each layer
        x = F.relu(self.fc2(x))
        x = F.sigmoid(self.output_layer(x))
        return x     # Return the created model

In [37]:
# Hyper-parameters
input_size = 11
output_size = 1
num_epochs = 1000
learning_rate = 0.01

In [38]:
M0 = ANN(input_dim=input_size,output_dim=output_size)
print(M0)

ANN(
  (fc1): Linear(in_features=11, out_features=6, bias=True)
  (fc2): Linear(in_features=6, out_features=6, bias=True)
  (output_layer): Linear(in_features=6, out_features=1, bias=True)
)


In [39]:
#loss
criterion = nn.BCELoss()
#optimizers
optimizer = optim.Adam(M0.parameters(), lr=learning_rate)

In [None]:
criterion(y_pred_test.squeeze(), target_test) 

In [57]:
target_test.shape

torch.Size([3000, 1])

In [59]:
M0.eval()     #not updating the model weights
data_test = Variable(test_x_std).float()
target_test = Variable(test_y).type(torch.FloatTensor)
y_pred_test = M0(data_test)
before_train = criterion(y_pred_test.squeeze(), target_test.squeeze()) 
print('Test loss before training' , before_train.item())

Test loss before training 0.5843700766563416


**--Model Trainning**

In [60]:
M0.train()
for epoch in range(num_epochs):
    data = Variable(train_x_std).float()
    target = Variable(train_y).type(torch.FloatTensor)
    
    # Forward pass
    outputs = M0(data)
    loss = criterion(outputs, target)
    
    # Backward and optimize
    optimizer.zero_grad()   #sets the gradients to zero before we start backpropagation. This is a necessary step as PyTorch accumulates the gradients from the backward passes from the previous epochs.
    loss.backward() #we perform backward pass to computes the gradients.
    optimizer.step() #updates the weights accordingly.
    
    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Epoch [10/1000], Loss: 0.5157
Epoch [20/1000], Loss: 0.4722
Epoch [30/1000], Loss: 0.4562
Epoch [40/1000], Loss: 0.4455
Epoch [50/1000], Loss: 0.4367
Epoch [60/1000], Loss: 0.4305
Epoch [70/1000], Loss: 0.4268
Epoch [80/1000], Loss: 0.4229
Epoch [90/1000], Loss: 0.4164
Epoch [100/1000], Loss: 0.4080
Epoch [110/1000], Loss: 0.4004
Epoch [120/1000], Loss: 0.3937
Epoch [130/1000], Loss: 0.3862
Epoch [140/1000], Loss: 0.3790
Epoch [150/1000], Loss: 0.3731
Epoch [160/1000], Loss: 0.3679
Epoch [170/1000], Loss: 0.3633
Epoch [180/1000], Loss: 0.3597
Epoch [190/1000], Loss: 0.3564
Epoch [200/1000], Loss: 0.3538
Epoch [210/1000], Loss: 0.3516
Epoch [220/1000], Loss: 0.3499
Epoch [230/1000], Loss: 0.3481
Epoch [240/1000], Loss: 0.3428
Epoch [250/1000], Loss: 0.3390
Epoch [260/1000], Loss: 0.3379
Epoch [270/1000], Loss: 0.3372
Epoch [280/1000], Loss: 0.3367
Epoch [290/1000], Loss: 0.3364
Epoch [300/1000], Loss: 0.3362
Epoch [310/1000], Loss: 0.3361
Epoch [320/1000], Loss: 0.3359
Epoch [330/1000],

### Evaluation

In [62]:
M0.eval()
data_test = Variable(test_x_std).float()
target_test = Variable(test_y).type(torch.FloatTensor)
y_pred_test = M0(data_test)
before_train = criterion(y_pred_test.squeeze(), target_test.squeeze()) 
print('Test loss after training' , before_train.item())

Test loss after training 0.325082391500473


In [65]:
roc_auc_score(test_y,y_pred_test.squeeze().detach().numpy())

0.8711347676806018

In [69]:
pred = y_pred_test.squeeze().detach().numpy() > 0.2

In [70]:
confusion_matrix(test_y,pred)

array([[1876,  519],
       [ 130,  475]], dtype=int64)

In [72]:
print(classification_report(test_y,pred))

              precision    recall  f1-score   support

           0       0.94      0.78      0.85      2395
           1       0.48      0.79      0.59       605

    accuracy                           0.78      3000
   macro avg       0.71      0.78      0.72      3000
weighted avg       0.84      0.78      0.80      3000



### Batch Processing 

In [74]:
# Make torch datasets from train and test sets
train = torch.utils.data.TensorDataset(train_x_std,train_y)
test = torch.utils.data.TensorDataset(test_x_std,test_y)

In [75]:
# Create train and test data loaders for batch processing
train_loader = torch.utils.data.DataLoader(train, batch_size = 10, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = 10, shuffle = True)

In [77]:
# Hyper-parameters
input_size = 11
output_size = 1
num_epochs = 20
learning_rate = 0.01

In [78]:
M0 = ANN(input_dim=input_size,output_dim=output_size)
print(M0)

ANN(
  (fc1): Linear(in_features=11, out_features=6, bias=True)
  (fc2): Linear(in_features=6, out_features=6, bias=True)
  (output_layer): Linear(in_features=6, out_features=1, bias=True)
)


In [79]:
M0.train()
epoch_list = []
train_loss_list = []

for epoch in range(num_epochs):
    # monitor training loss
    train_loss = 0.0
    val_loss = 0.0
    
    for data,target in train_loader:
        data = Variable(data).float()
        target = Variable(target).type(torch.FloatTensor)
    
        # Forward pass
        outputs = M0(data)
        loss = criterion(outputs, target)
    
        # Backward and optimize
        optimizer.zero_grad()   #sets the gradients to zero before we start backpropagation. This is a necessary step as PyTorch accumulates the gradients from the backward passes from the previous epochs.
        loss.backward() #we perform backward pass to computes the gradients.
        optimizer.step() #updates the weights accordingly.
        train_loss += loss.item()*data.size(0)
    # calculate average training loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)
    train_loss_list.append(train_loss)
    epoch_list.append(epoch + 1)
    
    #if (epoch+1) % 5 == 0:
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, train_loss))

Epoch [1/20], Loss: 0.6456
Epoch [2/20], Loss: 0.6456
Epoch [3/20], Loss: 0.6456
Epoch [4/20], Loss: 0.6456
Epoch [5/20], Loss: 0.6456
Epoch [6/20], Loss: 0.6456
Epoch [7/20], Loss: 0.6456
Epoch [8/20], Loss: 0.6456
Epoch [9/20], Loss: 0.6456
Epoch [10/20], Loss: 0.6456
Epoch [11/20], Loss: 0.6456
Epoch [12/20], Loss: 0.6456
Epoch [13/20], Loss: 0.6456
Epoch [14/20], Loss: 0.6456
Epoch [15/20], Loss: 0.6456
Epoch [16/20], Loss: 0.6456
Epoch [17/20], Loss: 0.6456
Epoch [18/20], Loss: 0.6456
Epoch [19/20], Loss: 0.6456
Epoch [20/20], Loss: 0.6456


### Evaluation

In [81]:
M0.eval()
data_test = Variable(test_x_std).float()
target_test = Variable(test_y).type(torch.FloatTensor)
y_pred_test = M0(data_test)
before_train = criterion(y_pred_test.squeeze(), target_test.squeeze()) 
print('Test loss after training' , before_train.item())

Test loss after training 0.644707202911377


In [82]:
roc_auc_score(test_y,y_pred_test.squeeze().detach().numpy())

0.4255815317724599

In [83]:
pred = y_pred_test.squeeze().detach().numpy() > 0.2

In [84]:
confusion_matrix(test_y,pred)

array([[   0, 2395],
       [   0,  605]], dtype=int64)

In [85]:
print(classification_report(test_y,pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2395
           1       0.20      1.00      0.34       605

    accuracy                           0.20      3000
   macro avg       0.10      0.50      0.17      3000
weighted avg       0.04      0.20      0.07      3000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
### Evaluation

M0.eval()
data_test = Variable(test_x_std).float()
target_test = Variable(test_y).type(torch.FloatTensor)
y_pred_test = M0(data_test)
before_train = criterion(y_pred_test.squeeze(), target_test.squeeze()) 
print('Test loss after training' , before_train.item())

roc_auc_score(test_y,y_pred_test.squeeze().detach().numpy())

pred = y_pred_test.squeeze().detach().numpy() > 0.2

confusion_matrix(test_y,pred)

print(classification_report(test_y,pred))