# Assessment of a plain MLP+mixup on SCTP


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from scipy.stats import beta

import torch
from torch import nn
from torch.utils.data import DataLoader,TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau

import matplotlib.pyplot as plt
import seaborn as sns

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger,CSVLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [None]:
# needed for deterministic output
pl.seed_everything(2)

# device in which the model will be trained
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

***
## data preparation

In [None]:
dataset = pd.read_csv("../input/santander-customer-transaction-prediction/train.csv")
dataset

In [None]:
dataset.info()

In [None]:
dataset.groupby("target")["ID_code"].count() / len(dataset)

In [None]:
# dataset stratified split: train 10% - valid 10% - test 80%

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=2)
split = skf.split(dataset, dataset.target)
_,train_index = next(split)
_,valid_index = next(split)

train_dset = dataset.loc[train_index].reset_index(drop=True)
valid_dset = dataset.loc[valid_index].reset_index(drop=True)
test_dset = dataset.drop(train_index).drop(valid_index).reset_index(drop=True)

In [None]:
display(train_dset.groupby("target")["ID_code"].count() / len(train_dset))
display(valid_dset.groupby("target")["ID_code"].count() / len(valid_dset))
display(test_dset.groupby("target")["ID_code"].count() / len(test_dset))

In [None]:
input_features = dataset.columns[2:].tolist()
target = "target"

In [None]:
# parsing inputs as pytorch tensor dataset

train_tensor_dset = TensorDataset(
    torch.tensor(train_dset[input_features].values, dtype=torch.float),
    torch.tensor(train_dset[target].values.reshape(-1,1), dtype=torch.float)
)

valid_tensor_dset = TensorDataset(
    torch.tensor(valid_dset[input_features].values, dtype=torch.float),
    torch.tensor(valid_dset[target].values.reshape(-1,1), dtype=torch.float)
)

test_tensor_dset = TensorDataset(
    torch.tensor(test_dset[input_features].values, dtype=torch.float),
    torch.tensor(test_dset[target].values.reshape(-1,1), dtype=torch.float) 
)

In [None]:
len(train_dset)

In [None]:
len(valid_dset)

In [None]:
len(test_dset)

***
## 3-layers MLP without mixup

In [None]:
class DNN(pl.LightningModule):

    def __init__(self, input_dim, output_dim, nn_depth, nn_width, dropout, momentum):
        super().__init__()

        self.bn_in = nn.BatchNorm1d(input_dim, momentum=momentum)
        self.dp_in = nn.Dropout(dropout)
        self.ln_in = nn.Linear(input_dim, nn_width, bias=False)

        self.bnorms = nn.ModuleList([nn.BatchNorm1d(nn_width, momentum=momentum) for i in range(nn_depth-1)])
        self.dropouts = nn.ModuleList([nn.Dropout(dropout) for i in range(nn_depth-1)])
        self.linears = nn.ModuleList([nn.Linear(nn_width, nn_width, bias=False) for i in range(nn_depth-1)])
        
        self.bn_out = nn.BatchNorm1d(nn_width, momentum=momentum)
        self.dp_out = nn.Dropout(dropout/2)
        self.ln_out = nn.Linear(nn_width, output_dim, bias=False)

        self.loss = nn.BCEWithLogitsLoss()

    def forward(self, x):
        x = self.bn_in(x)
        x = self.dp_in(x)
        x = nn.functional.relu(self.ln_in(x))

        for bn_layer,dp_layer,ln_layer in zip(self.bnorms,self.dropouts,self.linears):
            x = bn_layer(x)
            x = dp_layer(x)
            x = ln_layer(x)
            x = nn.functional.relu(x)
            
        x = self.bn_out(x)
        x = self.dp_out(x)
        x = self.ln_out(x)
        return x

    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('valid_loss', loss)
        
    def test_step(self, batch, batch_idx):
        X, y = batch
        y_logit = self.forward(X)
        y_probs = torch.sigmoid(y_logit).detach().cpu().numpy()
        loss = self.loss(y_logit, y)
        metric = roc_auc_score(y.cpu().numpy(), y_probs)
        self.log('test_loss', loss)
        self.log('test_metric', metric)
        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5e-3, weight_decay=1e-4)
        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer, 
                mode="min", 
                factor=0.5, 
                patience=5, 
                min_lr=1e-5),
            'interval': 'epoch',
            'frequency': 1,
            'reduce_on_plateau': True,
            'monitor': 'valid_loss',
        }
        return [optimizer], [scheduler]

In [None]:
model = DNN(
    input_dim=len(input_features), 
    output_dim=1, 
    nn_depth=3, 
    nn_width=128, 
    dropout=0.2, 
    momentum=0.1
)

logger = logger = CSVLogger("logs", name="mlp_wo_mixup")

early_stop_callback = EarlyStopping(
   monitor='valid_loss',
   min_delta=.0,
   patience=20,
   verbose=True,
   mode='min'
)

trainer = pl.Trainer(
    callbacks=[early_stop_callback], 
    min_epochs=10, 
    max_epochs=200, 
    gpus=0, 
    logger=logger, 
    deterministic=True
)

In [None]:
model.summarize()

In [None]:
trainer.fit(
    model, 
    DataLoader(train_tensor_dset, batch_size=1024, shuffle=True, num_workers=4),
    DataLoader(valid_tensor_dset, batch_size=1024, shuffle=False, num_workers=4)
)

In [None]:
# AUC on validation dataset
trainer.test(model, DataLoader(valid_tensor_dset, batch_size=1024, shuffle=False, num_workers=4))

In [None]:
# AUC on test dataset
trainer.test(model, DataLoader(test_tensor_dset, batch_size=1024, shuffle=False, num_workers=4))

In [None]:
metrics = pd.read_csv("logs/mlp_wo_mixup/version_0/metrics.csv")

df1 = metrics.loc[:,["step","train_loss"]].dropna()
df2 = metrics.loc[:,["step","valid_loss"]].dropna()

plt.figure(figsize=(12,5))
plt.plot(df1.step, df1.train_loss, "o-", label="train_loss")
plt.plot(df2.step, df2.valid_loss, "o-", label="valid_loss")
plt.grid()
plt.legend(loc="best")
plt.show()

***
## 3-layers MLP with mixup

In [None]:
alpha = 0.25

x = np.linspace(beta.ppf(0.01, alpha, alpha), beta.ppf(0.99, alpha, alpha), 100)
plt.plot(x, beta.pdf(x, alpha, alpha), 'r-', lw=5, alpha=0.6, label='beta pdf')
plt.grid()
plt.show()

In [None]:
class DNN(pl.LightningModule):

    def __init__(self, input_dim, output_dim, nn_depth, nn_width, dropout, momentum, alpha=0.8):
        super().__init__()
        
        self.alpha = alpha
        
        self.bn_in = nn.BatchNorm1d(input_dim, momentum=momentum)
        self.dp_in = nn.Dropout(dropout)
        self.ln_in = nn.Linear(input_dim, nn_width, bias=False)

        self.bnorms = nn.ModuleList([nn.BatchNorm1d(nn_width, momentum=momentum) for i in range(nn_depth-1)])
        self.dropouts = nn.ModuleList([nn.Dropout(dropout) for i in range(nn_depth-1)])
        self.linears = nn.ModuleList([nn.Linear(nn_width, nn_width, bias=False) for i in range(nn_depth-1)])
        
        self.bn_out = nn.BatchNorm1d(nn_width, momentum=momentum)
        self.dp_out = nn.Dropout(dropout/2)
        self.ln_out = nn.Linear(nn_width, output_dim, bias=False)

        self.loss = nn.BCEWithLogitsLoss()

    def forward(self, x):
        x = self.bn_in(x)
        x = self.dp_in(x)
        x = nn.functional.relu(self.ln_in(x))

        for bn_layer,dp_layer,ln_layer in zip(self.bnorms,self.dropouts,self.linears):
            x = bn_layer(x)
            x = dp_layer(x)
            x = ln_layer(x)
            x = nn.functional.relu(x)
            
        x = self.bn_out(x)
        x = self.dp_out(x)
        x = self.ln_out(x)
        return x

    def training_step(self, batch, batch_idx):
        X, y = batch
        
        lam = np.random.beta(alpha,alpha)
        lam = torch.FloatTensor([lam]).to(self.device)
        n = len(batch)//2
        X = lam * X[:n,:] + (1-lam) * X[n:,:]
        y = lam * y[:n,:] + (1-lam) * y[n:,:]
        
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('valid_loss', loss)
        
    def test_step(self, batch, batch_idx):
        X, y = batch
        y_logit = self.forward(X)
        y_probs = torch.sigmoid(y_logit).detach().cpu().numpy()
        loss = self.loss(y_logit, y)
        metric = roc_auc_score(y.cpu().numpy(), y_probs)
        self.log('test_loss', loss)
        self.log('test_metric', metric)
        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5e-3, weight_decay=1e-4)
        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer, 
                mode="min", 
                factor=0.5, 
                patience=5, 
                min_lr=1e-5),
            'interval': 'epoch',
            'frequency': 1,
            'reduce_on_plateau': True,
            'monitor': 'valid_loss',
        }
        return [optimizer], [scheduler]

In [None]:
model = DNN(
    input_dim=len(input_features), 
    output_dim=1, 
    nn_depth=3, 
    nn_width=128, 
    dropout=0.2, 
    momentum=0.1,
    alpha=0.25,
)

logger = logger = CSVLogger("logs", name="mlp_w_mixup")

early_stop_callback = EarlyStopping(
   monitor='valid_loss',
   min_delta=.0,
   patience=20,
   verbose=True,
   mode='min'
)

trainer = pl.Trainer(
    callbacks=[early_stop_callback], 
    min_epochs=10, 
    max_epochs=200, 
    gpus=0, 
    logger=logger,
    deterministic=True
)

In [None]:
model.summarize()

In [None]:
trainer.fit(
    model, 
    DataLoader(train_tensor_dset, batch_size=1024, shuffle=True, num_workers=4, drop_last=True),
    DataLoader(valid_tensor_dset, batch_size=1024, shuffle=False, num_workers=4)
)

In [None]:
# AUC on validation dataset
trainer.test(model, DataLoader(valid_tensor_dset, batch_size=1024, shuffle=False, num_workers=4))

In [None]:
# AUC on test dataset
trainer.test(model, DataLoader(test_tensor_dset, batch_size=1024, shuffle=False, num_workers=4))

In [None]:
metrics = pd.read_csv("logs/mlp_w_mixup/version_0/metrics.csv")

df1 = metrics.loc[:,["step","train_loss"]].dropna()
df2 = metrics.loc[:,["step","valid_loss"]].dropna()

plt.figure(figsize=(12,5))
plt.plot(df1.step, df1.train_loss, "o-", label="train_loss")
plt.plot(df2.step, df2.valid_loss, "o-", label="valid_loss")
plt.grid()
plt.legend(loc="best")
plt.show()

***