Adapting [my PyTorch Notebook](https://www.kaggle.com/yusufmuhammedraji/pytorch-cv-earlystopping-lrscheduler) to PyTorch Lightning

Using Bizen's [notebook](https://www.kaggle.com/hiro5299834/tps-nov-2021-pytorch-lightning) as a source.

# Version

- V2: First successful run
- V1: Draft

# References

- https://www.kaggle.com/yusufmuhammedraji/pytorch-cv-earlystopping-lrscheduler
- https://www.kaggle.com/hiro5299834/tps-nov-2021-pytorch-lightning

In [None]:
! pip install monai

In [None]:
import gc
import glob
import math
import os
import random
import time
from pathlib import Path

import feather
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from monai.metrics import ROCAUCMetric
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.core.memory import ModelSummary
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn import model_selection
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import (GroupKFold, KFold, StratifiedKFold,
                                     train_test_split)
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils import data

In [None]:
class Config:
    competition = "TPS_202111"
    seed = 42
    n_folds = 10
    batch_size = 1024
    epochs = 125
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    es_patience = 20
    lr_patience = 7
    lr = 0.01

In [None]:
data_dir = Path('../input/tabular-playground-series-nov-2021')

In [None]:
%%time
# Loading files in feather format
train_df = feather.read_dataframe('../input/tpsnov21/train.feather')
test_df = feather.read_dataframe('../input/tpsnov21/test.feather')
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")

print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")
print(f"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}")

In [None]:
# features = [col for col in train_df.columns if col not in ('id', 'target')]
features = ['f1', 'f10', 'f11', 'f14', 'f15', 'f16', 'f17', 'f2', 'f20', 'f21', 'f22', 'f24', 'f25', 'f26', 'f27', 'f28', 'f3', 'f30', 'f31', 'f32', 'f33', 'f34', 'f36', 'f37', 'f4', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f5', 'f50', 'f51', 'f53', 'f54', 'f55', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f64', 'f66', 'f67', 'f70', 'f71', 'f76', 'f77', 'f8', 'f80', 'f81', 'f82', 'f83', 'f87', 'f89', 'f9', 'f90', 'f91', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98']

In [None]:
class TPSDataset(data.Dataset):
    def __init__(self, X, y=None):
        super(TPSDataset).__init__()
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        if self.y is not None:
            return {
                'X' : torch.tensor(self.X.values[idx], dtype=torch.float),
                'y' : torch.tensor(self.y.values[idx], dtype=torch.float)
            }
        else:
            return {
                'X' : torch.tensor(self.X.values[idx], dtype=torch.float),
            }

In [None]:
scaler = StandardScaler()

train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

In [None]:
y_train = train_df.target

X_test = test_df.drop(columns=["id"], axis=1)
X_train = train_df.drop(columns=["id", "target"], axis=1)

In [None]:
# remove the unimportant features
X_train = X_train[features]
X_test = X_test[features]

In [None]:
train_dataset = TPSDataset(X_train, y_train)
test_dataset = TPSDataset(X_test)

In [None]:
test_loader = data.DataLoader(test_dataset, batch_size = 1024)

# PyTorch Lightning Module

In [None]:
def initialize_weights(model):
    if isinstance(model, nn.Linear):
#         nn.init.normal_(model.weight.data)
#         nn.init.xavier_uniform_(model.weight.data)
        nn.init.kaiming_uniform_(model.weight.data, nonlinearity="relu")
        nn.init.constant_(model.bias.data, 0)
    elif isinstance(model, nn.Conv2d):
        nn.init.kaiming_uniform_(model.weight.data, nonlinearity="relu")
        if model.bias is not None:
            nn.init.constant_(model.bias.data, 0)

In [None]:
class Model(pl.LightningModule):
    def __init__(self, in_features, activation=F.relu, lr=Config.lr):
        super().__init__()
        
        self.fc1 = nn.Linear(in_features, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 1)
        self.flatten = nn.Flatten()
        self.activation = activation
        self.roc_auc_metric = ROCAUCMetric()
        self.lr = lr
        
    def forward(self, x):
#         print("forward")
        x = self.flatten(x)
        x = self.bn1(self.activation(self.fc1(x)))
        x = self.bn2(self.activation(self.fc2(x)))
        x = self.bn3(self.activation(self.fc3(x)))
        x = torch.sigmoid(self.fc4(x))
        
        return torch.squeeze(x, dim=1)        
    
    def training_step(self, batch, batch_idx):
#         print("training_step")
        X, y = batch["X"], batch["y"]
        _y_pred = self(X)#.squeeze(1)
        loss = F.binary_cross_entropy(_y_pred, y)
        self.log("loss", loss)
        
        return {"loss": loss}
        
    
    def validation_step(self, batch, batch_idx):
        X, y = batch["X"], batch["y"]
        _y_pred = self(X)#.squeeze(1)
        self.roc_auc_metric(_y_pred, y)
        
    
    def validation_epoch_end(self, training_step_outputs):
        roc_auc = self.roc_auc_metric.aggregate()
        self.roc_auc_metric.reset()
        self.log("roc_auc", roc_auc)
        
    def predict_step(self, X, batch_idx, loader_idx=None):
        X = X["X"]
        return self(X)
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

# Training with Cross Validation

In [None]:
model = Model(in_features=len(features), activation=nn.SiLU())
print(ModelSummary(model))

In [None]:
%%time

pl.utilities.seed.seed_everything(Config.seed)

final_test_predictions = []
final_valid_predictions = {}
scores = []
histories = []

kf = StratifiedKFold(n_splits=Config.n_folds, random_state=Config.seed, shuffle=True)

for fold, (train_idx, valid_idx) in enumerate(kf.split(X = X_train, y = y_train)):
    print(10*"=", f"Fold={fold+1}/{Config.n_folds}", 10*"=")
    start_time = time.time()

    train_subset = data.Subset(train_dataset, train_idx)
    valid_subset = data.Subset(train_dataset, valid_idx)
    train_loader = data.DataLoader(train_subset, batch_size = Config.batch_size, shuffle=True)
    valid_loader = data.DataLoader(valid_subset, batch_size = Config.batch_size)
        
    model = Model(in_features=len(features), activation=nn.SiLU()).to(Config.device)
#     model.apply(initialize_weights)
    
    cp_callback = pl.callbacks.ModelCheckpoint(dirpath=Path.cwd() / "models", 
#                                                filename=f"./models/model_{fold}_{roc_auc:.3}", 
                                               filename=f"model_{fold}", 
                                               monitor="roc_auc", 
                                               mode="max", 
                                               save_weights_only=True)
    logger = TensorBoardLogger(save_dir=Path.cwd(), version=fold, name="lightning_logs")
    es_callback = EarlyStopping(monitor="loss", min_delta=0.0, patience=Config.es_patience, verbose=True, mode="min")
    
    trainer = pl.Trainer(
        fast_dev_run=False,
        max_epochs=Config.epochs,
        gpus=1,
        precision=32,
        limit_train_batches=1.0,
        num_sanity_val_steps=0,
        val_check_interval=1.0,
        callbacks=[cp_callback, es_callback],
        logger=logger
    )
    
    trainer.fit(model, train_loader, valid_loader)
    
    del model, trainer
    gc.collect()
    torch.cuda.empty_cache()
    
    trainer = pl.Trainer(gpus=1)
    
    model = Model(in_features=len(features), activation=nn.SiLU()).to(Config.device)
    model.load_state_dict(torch.load(glob.glob(f"./models/model_{fold}*.ckpt")[0])["state_dict"])    

    valid_preds = trainer.predict(model, valid_loader)
    valid_preds = torch.cat(valid_preds).cpu().numpy().flatten()
    test_preds = trainer.predict(model, test_loader)
    test_preds = torch.cat(test_preds).cpu().numpy().flatten()
    
#     _y = [data["y"] for data in valid_loader]
    _valid_true = []
    for batch in valid_loader:
        for x in batch["y"].numpy():
            _valid_true.append(x)
    
    _valid_true = np.array(_valid_true)
    
    auc = roc_auc_score(_valid_true, valid_preds)
    scores.append(auc)
    
    final_valid_predictions.update(dict(zip(valid_idx, valid_preds)))    
    final_test_predictions.append(test_preds)
    run_time = time.time() - start_time
    print(f"Fold={fold+1}, auc: {auc:.8f}, Run Time: {run_time:.2f}")

In [None]:
# glob.glob(f"./models/model_{fold}*.ckpt")[0]

In [None]:
print(f"Scores -> corrected: {np.mean(scores)-np.std(scores):.8f}, mean: {np.mean(scores):.8f}, std: {np.std(scores):.8f}")

In [None]:
sample_submission['target'] = np.mean(np.column_stack(final_test_predictions), axis=1)
sample_submission.to_csv("test_pred_2.csv",index=None)
sample_submission.to_csv("submission.csv",index=None)
sample_submission