In [32]:
%matplotlib inline
import pandas as pd
import numpy as np
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score

**Reading the Dataset**

In [33]:
data = pd.read_csv('data/default-credit-card-clients.csv')

In [34]:
data.head(5)

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default_payment
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [35]:
data = data.drop('ID', axis = 1)

In [36]:
data.head(1)

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default_payment
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1


In [37]:
data = pd.get_dummies(data, prefix=['EDUCATION', 'MARRIAGE'], columns=['EDUCATION', 'MARRIAGE'],drop_first=True).copy()

In [38]:
data.shape

(30000, 31)

**Split the data in train and validation set**

In [39]:
np.random.seed(3)
msk = np.random.rand(len(data)) < 0.8
train = data[msk].reset_index().drop('index', axis = 1)
valid = data[~msk].reset_index().drop('index', axis = 1)

In [40]:
valid.shape

(5982, 31)

**Normalizing the Dataset**

In [41]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [42]:
y_train = train.default_payment
train = scaler.fit_transform(train.loc[:,train.columns != 'default_payment'])

y_valid = valid.default_payment
valid = scaler.transform(valid.loc[:,valid.columns != 'default_payment'])

In [43]:
train.shape, valid.shape

((24018, 30), (5982, 30))

In [44]:
# data.loc[:,data.columns != 'default_payment'] = data.loc[:,data.columns != 'default_payment']\
#                         .apply(lambda x: (x-x.mean())/(x.max() - x.min()), axis=0)

**Creating the dataset for Data Loader**

In [45]:
class Credit_dataset():
    def __init__(self, x,y):
        self.x = torch.tensor(x).float()
        self.y = torch.tensor(y).float().unsqueeze(1)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self,idx):
        return self.x[idx], self.y[idx]

In [46]:
train_ds = Credit_dataset(train,y_train)
valid_ds = Credit_dataset(valid,y_valid)

**Dataloader**

In [47]:
train_dl = DataLoader(train_ds, batch_size = 1000, shuffle = True)
valid_dl = DataLoader(valid_ds, batch_size = 1000, shuffle = False)

**One layer NN model**

In [48]:
model = torch.nn.Sequential(
    torch.nn.Linear(30,1)
)

**Loss on the first batch of the data**

In [49]:
x ,y = next(iter(train_dl))
model.train()
y_hat = model(x)
loss = F.binary_cross_entropy(torch.sigmoid(y_hat), y)
print(f"loss for the first batch is {loss}")

loss for the first batch is 0.6602380275726318


As we can see the loss is very high for a binary dataset as there is no update in the gradients yet. This is computed based on the randomly initialized model parameters 

In [50]:
torch.sigmoid(y_hat).shape

torch.Size([1000, 1])

In [51]:
y.shape

torch.Size([1000, 1])

**Writing the training loop for the data**

In [53]:
def train_loop(model, train_dl, valid_dl, optimizer,epochs):
    
    losses = []
    for i in range(epochs):
        model = model.train()
        for x, y in train_dl:
            y_hat = model(x)
            loss = F.binary_cross_entropy(torch.sigmoid(y_hat), y.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        train_loss = np.mean(losses)
        
        valid_loss, valid_auc = val_metrics(model, valid_dl)
        print("train loss %.3f valid loss %.3f auc-roc %.3f" % (train_loss, valid_loss, valid_auc))

**writing the loop to calculate validation loss after every epoch**

In [52]:
def val_metrics(model, valid_dl):
    model.eval()
    losses = []
    y_hats = []
    ys = []
    for x, y in valid_dl:
        y_hat = model(x)
        loss = F.binary_cross_entropy(torch.sigmoid(y_hat), y.float())
        y_hats.append(y_hat.detach().numpy())
        ys.append(y.numpy())
        losses.append(loss.item())
    ys = np.concatenate(ys)
    y_hats = np.concatenate(y_hats)
    #print(y_hats.shape,ys.shape )
    return np.mean(losses), roc_auc_score(ys, y_hats)

In [54]:
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

**9**

In [55]:
epochs =20
train_loop(model, train_dl, valid_dl, optimizer,epochs)

train loss 0.566 valid loss 0.537 auc-roc 0.482
train loss 0.550 valid loss 0.524 auc-roc 0.574
train loss 0.537 valid loss 0.514 auc-roc 0.625
train loss 0.529 valid loss 0.507 auc-roc 0.647
train loss 0.522 valid loss 0.502 auc-roc 0.657
train loss 0.518 valid loss 0.498 auc-roc 0.666
train loss 0.512 valid loss 0.495 auc-roc 0.671
train loss 0.509 valid loss 0.493 auc-roc 0.673
train loss 0.508 valid loss 0.491 auc-roc 0.676
train loss 0.505 valid loss 0.489 auc-roc 0.678
train loss 0.502 valid loss 0.488 auc-roc 0.678
train loss 0.500 valid loss 0.488 auc-roc 0.679
train loss 0.499 valid loss 0.487 auc-roc 0.680
train loss 0.497 valid loss 0.486 auc-roc 0.682
train loss 0.496 valid loss 0.485 auc-roc 0.683
train loss 0.494 valid loss 0.485 auc-roc 0.684
train loss 0.493 valid loss 0.484 auc-roc 0.684
train loss 0.492 valid loss 0.484 auc-roc 0.686
train loss 0.491 valid loss 0.483 auc-roc 0.687
train loss 0.489 valid loss 0.483 auc-roc 0.688


As we can see, after a few epochs, the loss is not improving a lot

# Two Layer NN

In [56]:
train_dl = DataLoader(train_ds, batch_size = 1000, shuffle = True)
valid_dl = DataLoader(valid_ds, batch_size = 1000, shuffle = False)

In [57]:
model = torch.nn.Sequential(
    nn.Linear(30,5),
    nn.ReLU(),
    nn.Linear(5,1)
)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [58]:
epochs = 20
learning_rate = 0.01
train_loop(model, train_dl, valid_dl, optimizer,epochs)

train loss 0.618 valid loss 0.545 auc-roc 0.464
train loss 0.576 valid loss 0.523 auc-roc 0.598
train loss 0.558 valid loss 0.510 auc-roc 0.651
train loss 0.545 valid loss 0.499 auc-roc 0.667
train loss 0.534 valid loss 0.492 auc-roc 0.676
train loss 0.524 valid loss 0.488 auc-roc 0.680
train loss 0.517 valid loss 0.486 auc-roc 0.683
train loss 0.511 valid loss 0.485 auc-roc 0.685
train loss 0.507 valid loss 0.484 auc-roc 0.689
train loss 0.503 valid loss 0.481 auc-roc 0.693
train loss 0.501 valid loss 0.482 auc-roc 0.692
train loss 0.499 valid loss 0.480 auc-roc 0.697
train loss 0.496 valid loss 0.478 auc-roc 0.698
train loss 0.493 valid loss 0.480 auc-roc 0.700
train loss 0.491 valid loss 0.479 auc-roc 0.702
train loss 0.490 valid loss 0.476 auc-roc 0.701
train loss 0.488 valid loss 0.476 auc-roc 0.703
train loss 0.486 valid loss 0.476 auc-roc 0.703
train loss 0.485 valid loss 0.475 auc-roc 0.704
train loss 0.484 valid loss 0.475 auc-roc 0.704


**As we introduce a new layer, the auc-roc of the model increased a bit**

This can be further improved if add more parameters to the NN model by increasing the depth or the number of nodes