 * This Notebook is intended to showcase a Multilayer Perceptron **(MLP) implementation in Pytorch using structured dataset** such as this - Credit Card data.
 * EDA etc of the data is not carried out , there are plenty of good notebooks for this dataset depicting  the same , you can refer them over : [here](https://www.kaggle.com/thomaskonstantin/bank-churn-data-exploration-and-churn-prediction)


In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.metrics import f1_score as f1
from sklearn.metrics import confusion_matrix
from sklearn.metrics import *
from imblearn.over_sampling import SMOTE

#-- Pytorch specific libraries import -----#
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [None]:
df_data=pd.read_csv('../input/credit-card-customers/BankChurners.csv')
df_data.columns

In [None]:
df_data.shape

## Data Preprocessing 

* OHE of Categorical features
* Up-sampling using SMOTE
* Dropping redundant and unwanted fields

In [None]:
#OHE of Categorical features
df_data.Attrition_Flag = df_data.Attrition_Flag.replace({'Attrited Customer':1,'Existing Customer':0})
df_data.Gender = df_data.Gender.replace({'F':1,'M':0})
df_data = pd.concat([df_data,pd.get_dummies(df_data['Education_Level']).drop(columns=['Unknown'])],axis=1)
df_data = pd.concat([df_data,pd.get_dummies(df_data['Income_Category']).drop(columns=['Unknown'])],axis=1)
df_data = pd.concat([df_data,pd.get_dummies(df_data['Marital_Status']).drop(columns=['Unknown'])],axis=1)
df_data = pd.concat([df_data,pd.get_dummies(df_data['Card_Category']).drop(columns=['Platinum'])],axis=1)
df_data.drop(columns = ['Education_Level','Income_Category','Marital_Status','Card_Category','CLIENTNUM'],inplace=True)

In [None]:
df_data.shape

In [None]:
#SMOTE upsampling
oversample = SMOTE()
X, y = oversample.fit_resample(df_data[df_data.columns[1:]], df_data[df_data.columns[0]])

In [None]:
df_data.columns[1:]

In [None]:
df_data.columns[0]

In [None]:
upsampled_df = pd.DataFrame(data=X,columns=df_data.columns[1:])
upsampled_df = upsampled_df.assign(Churn = y)
ohe_data =upsampled_df[upsampled_df.columns[15:-1]].copy()
upsampled_df = upsampled_df.drop(columns=upsampled_df.columns[15:-1])
upsampled_df.shape

In [None]:
upsampled_df.dtypes

In [None]:
#Train & Test Set
X= upsampled_df.loc[: , upsampled_df.columns != 'Churn']
#y = upsampled_df['Churn']
y = pd.DataFrame(upsampled_df['Churn'])

train_x,test_x,train_y,test_y = train_test_split(X,y,random_state=42)
print(test_x.shape)
print(test_y.shape)

## Pytorch - MLP implementation

### Converting Data into Pytorch Tensors

In [None]:
###First use a MinMaxscaler to scale all the features of Train & Test dataframes

scaler = preprocessing.MinMaxScaler()
x_train = scaler.fit_transform(train_x.values)
x_test =  scaler.fit_transform(test_x.values)

print("Scaled values of Train set \n")
print(x_train)
print("\nScaled values of Test set \n")
print(x_test)


###Then convert the Train and Test sets into Tensors

x_tensor =  torch.from_numpy(x_train).float()
y_tensor =  torch.from_numpy(train_y.values.ravel()).float()
xtest_tensor =  torch.from_numpy(x_test).float()
ytest_tensor =  torch.from_numpy(test_y.values.ravel()).float()

print("\nTrain set Tensors \n")
print(x_tensor)
print(y_tensor)

### Dataloader to pass data in batches

In [None]:
#Define a batch size , hyperparameter can be further tuned
bs = 64
#Both x_train and y_train can be combined in a single TensorDataset, which will be easier to iterate over and slice
y_tensor = y_tensor.unsqueeze(1)
train_ds = TensorDataset(x_tensor, y_tensor)
#Pytorchâ€™s DataLoader is responsible for managing batches. 
#You can create a DataLoader from any Dataset. DataLoader makes it easier to iterate over batches
train_dl = DataLoader(train_ds, batch_size=bs)


#For the validation/test dataset
ytest_tensor = ytest_tensor.unsqueeze(1)
test_ds = TensorDataset(xtest_tensor, ytest_tensor)
test_loader = DataLoader(test_ds, batch_size=32)

### MLP (Model) 

Define the Layers , Activation function , Number of nodes for the MultiLayerPerceptron

Structure of MLP

* 2 Hidden Layers
* Normalizing the batch data usign batchnorm in between each layer
* Using ReLU Activation function between the layers
* Using dropout before sending to output
* Sigmoid to make probabilities between 0 to 1

In [None]:
n_input_dim = train_x.shape[1]

#Layer size
n_hidden1 = 120  # Number of hidden nodes
n_hidden2 = 100
n_output =  1   # Number of output nodes = for binary classifier


class ChurnModel(nn.Module):
    def __init__(self):
        super(ChurnModel, self).__init__()
        self.layer_1 = nn.Linear(n_input_dim, n_hidden1) 
        self.layer_2 = nn.Linear(n_hidden1, n_hidden2)
        self.layer_out = nn.Linear(n_hidden2, n_output) 
        
        
        self.relu = nn.ReLU()
        self.sigmoid =  nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(n_hidden1)
        self.batchnorm2 = nn.BatchNorm1d(n_hidden2)
        
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.sigmoid(self.layer_out(x))
        
        return x
    

model = ChurnModel()
print(model)

### Defining 

* Loss computation function : Here using Binary Cross Entropy (BCE) which is defacto for Binary class problems
* Learning rate : Setting as 0.001 (can be optimized further)
* Optimizer : Using Adam and
* Epochs of Training : setting as 50 

In [None]:
#Loss Computation
loss_func = nn.BCELoss()
#Optimizer
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 50

### Training the MLP Model

NN Steps
1. Forward Propagation
2. Loss computation
3. Backpropagation
4. Updating the parameters

In [None]:
model.train()
train_loss = []
for epoch in range(epochs):
    #Within each epoch run the subsets of data = batch sizes.
    for xb, yb in train_dl:
        y_pred = model(xb)            # Forward Propagation
        loss = loss_func(y_pred, yb)  # Loss Computation
        optimizer.zero_grad()         # Clearing all previous gradients, setting to zero 
        loss.backward()               # Back Propagation
        optimizer.step()              # Updating the parameters
        #optimizer.zero_grad() 
    #print("Loss in iteration :"+str(epoch)+" is: "+str(loss.item()))
    train_loss.append(loss.item())
print('Last iteration loss value: '+str(loss.item()))

Plotting the loss function shows it stabilized after 20th epoch itself

In [None]:
plt.plot(train_loss)
plt.show()

### Test Dataset prediction on trained NN

In [None]:
import itertools

y_pred_list = []
model.eval()
#Since we don't need model to back propagate the gradients in test set we use torch.no_grad()
# reduces memory usage and speeds up computation
with torch.no_grad():
    for xb_test,yb_test  in test_loader:
        y_test_pred = model(xb_test)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.detach().numpy())

#Takes arrays and makes them list of list for each batch        
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
#flattens the lists in sequence
ytest_pred = list(itertools.chain.from_iterable(y_pred_list))

In [None]:
y_true_test = test_y.values.ravel()
conf_matrix = confusion_matrix(y_true_test ,ytest_pred)
print("Confusion Matrix of the Test Set")
print("-----------")
print(conf_matrix)
print("Precision of the MLP :\t"+str(precision_score(y_true_test,ytest_pred)))
print("Recall of the MLP    :\t"+str(recall_score(y_true_test,ytest_pred)))
print("F1 Score of the Model :\t"+str(f1_score(y_true_test,ytest_pred)))