# Setup & Import libraries

In [None]:
import os
import numpy as np

import torch
from sklearn.metrics import classification_report

In [None]:
os.chdir("/content/drive/MyDrive/datascience_projects/customer_churn_analysis")
%ls

[0m[01;34mdata[0m/  [01;34mnotebook[0m/


# Load Dataset

In [None]:
train_dataset = np.load("data/train_binary_data.npz")
validation_dataset = np.load("data/validation_binary_data.npz")
test_dataset = np.load("data/test_binary_data.npz")

# Create Dataset Pytorch Class

In [None]:
class NumpyDataset(torch.utils.data.Dataset):

    def __init__(self, X, y, transform=None, target_transform=None):
        self.X = X
        self.y = y
        self.transform = transform
        self.target_transform = target_transform

        # check if all features are float type
        self.X = self.X.astype("float32")
        self.y = self.y.astype("float32")
        self.y = np.reshape(self.y, (len(self.y), -1))

    # number of rows in dataset
    def __len__(self):
        return len(self.X)
    
    # get a row at an index
    def __getitem__(self, index):
        sample, target = self.X[index], self.y[index]
        if (self.transform):
            self.transform(sample)
            self.target_transform(target)
        return sample, target

In [None]:
train_dataset = NumpyDataset(train_dataset['x'], train_dataset['y'])

In [None]:
train_dataset.__len__()

6400

In [None]:
validation_dataset = NumpyDataset(validation_dataset['x'], validation_dataset['y'])

In [None]:
validation_dataset.__len__()

1600

In [None]:
test_dataset = NumpyDataset(test_dataset['x'], test_dataset['y'])

In [None]:
test_dataset.__len__()

2000

# Create Model Architecture

In [None]:
class AnnModel(torch.nn.Module):

    def __init__(self, n_features):
        # calling constructor of parents class
        super().__init__()

        # defining the inputs to the first hidden layer
        self.hid1 = torch.nn.Linear(in_features=n_features, out_features=6)
        torch.nn.init.kaiming_uniform_(self.hid1.weight, nonlinearity="relu")
        self.act1 = torch.nn.ReLU()

        # defining the inputs to the first hidden layer
        self.hid2 = torch.nn.Linear(in_features=6, out_features=6)
        torch.nn.init.kaiming_uniform_(self.hid1.weight, nonlinearity="relu")
        self.act2 = torch.nn.ReLU()

        self.hid3 = torch.nn.Linear(in_features=6, out_features=1)
        torch.nn.init.xavier_uniform_(self.hid3.weight)
        # self.act3 = torch.nn.Sigmoid()


    def forward(self, X):
        #input and act for layer 1
        X = self.hid1(X)
        X = self.act1(X)
        
        #input and act for layer 2
        X = self.hid2(X)
        X = self.act2(X)
        
        #input and act for layer 3
        X = self.hid3(X)
        # X = self.act3(X)
        
        return X

# Preparing Data in PyTorch

In [None]:
train_dl = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32,
                            shuffle=True)
validation_dl = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=32,
                            shuffle=False)

In [None]:
test_dl = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

# Train the Model

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
model = AnnModel(train_dataset.X.shape[1])
model.to(device)

AnnModel(
  (hid1): Linear(in_features=12, out_features=6, bias=True)
  (act1): ReLU()
  (hid2): Linear(in_features=6, out_features=6, bias=True)
  (act2): ReLU()
  (hid3): Linear(in_features=6, out_features=1, bias=True)
)

In [None]:
epochs=100
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.BCEWithLogitsLoss()

In [None]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

model.train()
for epoch in range(epochs):
    epoch_loss = 0
    epoch_acc = 0
    # go through all the batches generated by dataloader
    for i, (inputs, targets) in enumerate(train_dl):
        inputs, targets = inputs.to(device), targets.to(device)
        # clear the gradients
        optimizer.zero_grad()
        # compute the model output
        yhat = model(inputs)
        targets = torch.reshape(targets, (-1, 1))
        # print(yhat.shape, targets.shape)
        # calculate loss
        loss = criterion(yhat, targets)
        acc = binary_acc(yhat, targets)
        # credit assignment
        loss.backward()
        # update model weights
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {epoch+1:03}: | Train Loss: {epoch_loss/len(train_dl):.5f} | Train Acc: {epoch_acc/len(train_dl):.3f}', end="\n\t")
    epoch_loss = 0.0
    epoch_acc = 0
    model.eval()     # Optional when not using Model Specific layer
    for inputs, targets in validation_dl:
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()
        
        yhat = model(inputs)
        loss = criterion(yhat, targets)
        epoch_loss += loss.item()
        acc = binary_acc(yhat, targets)
        epoch_acc += acc.item()

    print(f'Epoch {epoch+1} | Validation Loss: {epoch_loss / len(validation_dl)} | Validation Acc: {epoch_acc/len(validation_dl):.3f}')
    


Epoch 001: | Train Loss: 0.46811 | Train Acc: 79.380
	Epoch 1 | Validation Loss: 0.42561756938695905 | Validation Acc: 80.700
Epoch 002: | Train Loss: 0.42939 | Train Acc: 81.785
	Epoch 2 | Validation Loss: 0.3958232843875885 | Validation Acc: 82.140
Epoch 003: | Train Loss: 0.38294 | Train Acc: 84.225
	Epoch 3 | Validation Loss: 0.35729788631200793 | Validation Acc: 85.540
Epoch 004: | Train Loss: 0.36250 | Train Acc: 85.435
	Epoch 4 | Validation Loss: 0.3413339702785015 | Validation Acc: 86.440
Epoch 005: | Train Loss: 0.35703 | Train Acc: 85.525
	Epoch 5 | Validation Loss: 0.34571986079216005 | Validation Acc: 85.140
Epoch 006: | Train Loss: 0.35328 | Train Acc: 85.830
	Epoch 6 | Validation Loss: 0.34778309673070906 | Validation Acc: 86.380
Epoch 007: | Train Loss: 0.35220 | Train Acc: 85.775
	Epoch 7 | Validation Loss: 0.34064441978931426 | Validation Acc: 86.660
Epoch 008: | Train Loss: 0.34961 | Train Acc: 85.795
	Epoch 8 | Validation Loss: 0.3455420881509781 | Validation Acc: 86

# Evaluate the test set

In [None]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch, y_batch in test_dl:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [None]:
y_true = test_dataset.y

In [None]:
print(classification_report(y_true, y_pred_list))

              precision    recall  f1-score   support

         0.0       0.88      0.97      0.92      1607
         1.0       0.76      0.45      0.56       393

    accuracy                           0.86      2000
   macro avg       0.82      0.71      0.74      2000
weighted avg       0.85      0.86      0.85      2000

