In [None]:
import numpy as np
import pandas as pd

import os
import gc
import glob
import random
import shutil

from matplotlib import pyplot as plt

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.optim.lr_scheduler import ExponentialLR

from pytorch_lightning.loggers import TensorBoardLogger

import warnings
warnings.simplefilter('ignore')

In [None]:
N_SPLITS = 10
SEED = 42

BATCH_SIZE = 1024
WORKERS = 4
EPOCHS = 100

LEARNING_RATE = 2e-3

MODEL_PATH = "models"
TB_LOG_NAME = "lightning_logs"

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    pl.utilities.seed.seed_everything(seed, workers=True)
    
seed_everything(SEED)

In [None]:
train_df = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')

In [None]:
train_df.target.value_counts(normalize=True)

In [None]:
train_df = train_df[train_df.columns[1:]]

train_df['fold'] = -1
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['target'])):
    train_df.loc[val_idx, 'fold'] = fold

In [None]:
train_df

<h1> Network implementation </h1>

In [None]:
!pip install -q monai-weekly

In [None]:
from torch.utils.data import TensorDataset, DataLoader
import torch
from torch import nn
from torch.nn import functional as F
from monai.metrics import ROCAUCMetric
from pytorch_lightning.core.memory import ModelSummary


class Model(pl.LightningModule):
    def __init__(self, in_size, learning_rate, num_targets=1, hidden_size=384):
        super().__init__()
        self.in_size = in_size
        self.lr = learning_rate
        self.num_targets = num_targets
        self.hidden_size = hidden_size
        
        self.fc1 = nn.Linear(self.in_size, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, self.hidden_size//2)
        self.fc3 = nn.Linear(self.hidden_size//2, self.hidden_size//4)
        self.fc4 = nn.Linear(self.hidden_size//4, self.hidden_size//2)
        self.fc5 = nn.Linear(self.hidden_size//2, self.hidden_size)
        self.fc6 = nn.Linear(self.hidden_size+self.hidden_size//4, 128)
        self.fc7 = nn.Linear(128, self.num_targets)
        self.relu = F.relu
        self.swish = F.hardswish
        self.flatten = nn.Flatten()
        self.dropout1 = nn.Dropout(0.45)
        self.dropout2 = nn.Dropout(0.35)
        self.dropout3 = nn.Dropout(0.25)
        self.batchnorm1 = nn.BatchNorm1d(self.hidden_size)
        self.batchnorm2 = nn.BatchNorm1d(self.hidden_size//2)
        self.batchnorm3 = nn.BatchNorm1d(self.hidden_size//4)
        self.batchnorm4 = nn.BatchNorm1d(128)
        self.concat = torch.cat
        self.multiply = torch.mul
        self.roc_auc_metric = ROCAUCMetric()
    
    def forward(self, x):
        x1 = self.flatten(x)
        x1 = self.swish(self.fc1(x1))
        x1 = self.batchnorm1(x1)
        x2 = self.dropout1(x1)
        
        x2 = self.swish(self.fc2(x2))
        x2 = self.batchnorm2(x2)
        x3 = self.dropout2(x2)

        x3 = self.swish(self.fc3(x3))
        x3 = self.batchnorm3(x3)
        x3 = self.dropout3(x3)
        
        x4 = self.swish(self.fc4(x3))
        x4 = self.batchnorm2(x4)
        x4 = self.multiply(x2, x4)
        x4 = self.dropout2(x4)
        
        x5 = self.swish(self.fc5(x4))
        x5 = self.batchnorm1(x5)
        x5 = self.multiply(x1, x5)
        x5 = self.dropout1(x5)

        x = self.concat((x3, x5), dim=1)
        x = self.swish(self.fc6(x))
        x = self.batchnorm4(x)
        x = self.dropout3(x)
        
        x = self.fc7(x)
        
        return x
    
    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X).squeeze(1)
        loss = F.binary_cross_entropy_with_logits(y_hat, y)  
        self.log('loss', loss)
        return {'loss': loss}
        
    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X).squeeze(1)
        self.roc_auc_metric(y_hat, y)      
    
    def validation_epoch_end(self, training_step_outputs):
        roc_auc = self.roc_auc_metric.aggregate()
        self.roc_auc_metric.reset()
        self.log('roc_auc', roc_auc)
        
    def predict_step(self, X, batch_idx: int, dataloader_idx: int = None):
        return self(X[0])    
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, eps=1e-8, weight_decay=1e-2, amsgrad=False)
        return optimizer

In [None]:
pipes = []

for fold in range(N_SPLITS):
    print('Fold:', fold)
    train_data = train_df[train_df['fold']!=fold]
    val_data = train_df[train_df['fold']==fold]
    
    X_train = train_data.drop(['target', 'fold'], axis=1)
    y_train = train_data['target']
    
    X_val = val_data.drop(['target', 'fold'], axis=1)
    y_val = val_data['target']

    pipe = Pipeline([
            ("scaler", MinMaxScaler()),
    ])

    pipe.fit(X_train)
    pipes.append(pipe)
    
    X_train = pd.DataFrame(pipe.transform(X_train), columns=X_train.columns, index=X_train.index)
    X_val = pd.DataFrame(pipe.transform(X_val), columns=X_val.columns, index=X_val.index)
    
    train_ds = TensorDataset(torch.FloatTensor(X_train.values), torch.FloatTensor(y_train.values))
    val_ds = TensorDataset(torch.FloatTensor(X_val.values), torch.FloatTensor(y_val.values))

    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)

    model = Model(X_train.shape[1],
                  LEARNING_RATE,
                  1,
                  384,
                 )

    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        dirpath=MODEL_PATH,
        filename=f'model_{fold}_' + '{roc_auc:.3}',
        monitor='roc_auc',
        mode='max',
        save_weights_only=True)

    logger = TensorBoardLogger(
        save_dir=os.getcwd(),
        version=fold,
        name=TB_LOG_NAME
    )
 
    early_stop_callback = EarlyStopping(
        monitor='loss',
        min_delta=0.00,
        patience=3,
        verbose=False,
        mode='min'
    )
    
    # print(ModelSummary(model))
    trainer = pl.Trainer(
        fast_dev_run=False,
        max_epochs=EPOCHS,
        gpus=1,
        precision=32,
        limit_train_batches=1.0,
        limit_val_batches=1.0, 
        num_sanity_val_steps=0,
        val_check_interval=1.0, 
        callbacks=[checkpoint_callback],
        logger=logger
     )

    trainer.fit(model, train_dl, val_dl)
    
    del model, trainer, val_data, train_data, X_train, X_val, y_train, y_val, train_ds, val_ds, train_dl, val_dl
    gc.collect()
    torch.cuda.empty_cache()

<h1> Inference </h1>

In [None]:
trained_models = []
for i in range(N_SPLITS):
    list = glob.glob(f"./models/model_{i}_*.ckpt")
    list.sort()
    trained_models.append(list[-1])

In [None]:
trained_models

In [None]:
all_preds = []
test_df = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')
test_df = test_df[test_df.columns[1:]]

sample_df = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')
trainer = pl.Trainer(gpus=1)

model = Model(test_df.shape[1], LEARNING_RATE)
for model_name in trained_models:
    fold = int(model_name.split('_')[1])
    pipe = pipes[fold]
    test_data = pipe.transform(test_df)
    
    model.load_state_dict(torch.load(model_name)['state_dict'])
    test_ds = TensorDataset(torch.FloatTensor(test_data))
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)
    
    preds = trainer.predict(model, test_dl)
    preds = torch.cat(preds).cpu().numpy().flatten()
    
    all_preds.append(preds)

In [None]:
np_all_preds = np.array(all_preds)

np_all_preds[:, :4], np_all_preds[:, -4:]

In [None]:
avg_preds = np.mean(np_all_preds, axis=0)

avg_preds[:4], avg_preds[-4:]

In [None]:
sample_df['target'] = avg_preds
sample_df.to_csv('submission.csv', index=False)
sample_df