# Library Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import torch
from torch import nn, optim
from torch.nn.utils import weight_norm as WN
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
import torch.nn.functional as F

import os
import gc
from time import time

seed = 0


def breaker(num=50, char="*"):
    print("\n" + num*char + "\n")


def head(x, no_of_ele=5):
    print(x[:no_of_ele])

    
sc_ = StandardScaler()
le_ = LabelEncoder()

# Data Processing

In [None]:
def clean_and_build_loaders(batch_size):
    breaker()
    print("Loading Data ...")
    class DS(Dataset):
        def __init__(self, X=None, y=None, mode="train"):
            self.mode = mode
            self.X = X
            if self.mode == "train" or self.mode == "valid":
                self.y = y
        
        def __len__(self):
            return self.X.shape[0]
    
        def __getitem__(self, idx):
            if self.mode == "train" or self.mode == "valid":
                return torch.FloatTensor(self.X[idx]), torch.LongTensor(self.y[idx])
            else:
                
                return torch.FloatTensor(self.X[idx])
    
    tr_data = pd.read_csv("../input/tabular-playground-series-jun-2021/train.csv", engine="python")
    ts_data = pd.read_csv("../input/tabular-playground-series-jun-2021/test.csv", engine="python")
    
    tr_data, ts_data = tr_data.drop(labels="id", axis=1), ts_data.drop(labels="id", axis=1)
    
    tr_features, labels = tr_data.iloc[:, :-1].copy().values, tr_data.iloc[:, -1].copy().values
    ts_features = ts_data.copy().values
    
    tr_features = sc_.fit_transform(tr_features)
    ts_features = sc_.transform(ts_features)
    labels = le_.fit_transform(labels)
    
    IL = ts_features.shape[1]
    OL = len(set(labels))
    
    X_train, X_valid, y_train, y_valid = train_test_split(tr_features, labels, test_size=0.2, 
                                                          shuffle=True, random_state=seed, stratify=labels)
    
    tr_data_setup = DS(X=X_train, y=y_train.reshape(-1, 1), mode="train")
    va_data_setup = DS(X=X_valid, y=y_valid.reshape(-1, 1), mode="valid")
    ts_data_setup = DS(X=ts_features, y=None, mode="test")
    
    dataloaders = {
        "train" : DL(tr_data_setup, batch_size=batch_size, shuffle=True, generator=torch.manual_seed(seed)),
        "valid" : DL(va_data_setup, batch_size=batch_size, shuffle=False),
        "test"  : DL(ts_data_setup, batch_size=batch_size, shuffle=False) 
    }
    
    return dataloaders, IL, OL

# Model

In [None]:
def build_model(IL, HL, OL, use_DP=False, DP=0.5):
    breaker()
    print("Building Model ...")
    class Classifier(nn.Module):
        def __init__(self, IL=None, HL=None, OL=None, use_DP=False, DP=0.5):

            super(Classifier, self).__init__()

            self.use_DP = use_DP
            if self.use_DP:
                self.DP_ = nn.Dropout(p=DP)

            self.HL = HL

            if len(self.HL) == 1:
                self.BN1 = nn.BatchNorm1d(num_features=IL, eps=1e-5)
                self.FC1 = WN(nn.Linear(in_features=IL, out_features=HL[0]))

                self.BN2 = nn.BatchNorm1d(num_features=HL[0], eps=1e-5)
                self.FC2 = WN(nn.Linear(in_features=HL[0], out_features=OL))

            elif len(self.HL) == 2:
                self.BN1 = nn.BatchNorm1d(num_features=IL, eps=1e-5)
                self.FC1 = WN(nn.Linear(in_features=IL, out_features=HL[0]))

                self.BN2 = nn.BatchNorm1d(num_features=HL[0], eps=1e-5)
                self.FC2 = WN(nn.Linear(in_features=HL[0], out_features=HL[1]))

                self.BN3 = nn.BatchNorm1d(num_features=HL[1], eps=1e-5)
                self.FC3 = WN(nn.Linear(in_features=HL[1], out_features=OL))

            elif len(self.HL) == 3:
                self.BN1 = nn.BatchNorm1d(num_features=IL, eps=1e-5)
                self.FC1 = WN(nn.Linear(in_features=IL, out_features=HL[0]))

                self.BN2 = nn.BatchNorm1d(num_features=HL[0], eps=1e-5)
                self.FC2 = WN(nn.Linear(in_features=HL[0], out_features=HL[1]))

                self.BN3 = nn.BatchNorm1d(num_features=HL[1], eps=1e-5)
                self.FC3 = WN(nn.Linear(in_features=HL[1], out_features=HL[2]))

                self.BN4 = nn.BatchNorm1d(num_features=HL[2], eps=1e-5)
                self.FC4 = WN(nn.Linear(in_features=HL[2], out_features=OL))

        def getOptimizer(self, lr=1e-3, wd=0):
            return optim.Adam(self.parameters(), lr=lr, weight_decay=wd)

        def getPlateauLR(self, optimizer=None, patience=5, eps=1e-6):
            return optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=patience, eps=eps, verbose=True)

        def forward(self, x):
            if not self.use_DP:
                if len(self.HL) == 1:
                    x = F.relu(self.FC1(self.BN1(x)))
                    x = F.log_softmax(self.FC2(self.BN2(x)), dim=1)

                    return x

                elif len(self.HL) == 2:
                    x = F.relu(self.FC1(self.BN1(x)))
                    x = F.relu(self.FC2(self.BN2(x)))
                    x = F.log_softmax(self.FC3(self.BN3(x)), dim=1)

                    return x

                elif len(self.HL) == 3:
                    x = F.relu(self.FC1(self.BN1(x)))
                    x = F.relu(self.FC2(self.BN2(x)))
                    x = F.relu(self.FC3(self.BN3(x)))
                    x = F.log_softmax(self.FC4(self.BN4(x)), dim=1)

                    return x
            else:
                if len(self.HL) == 1:
                    x = F.relu(self.DP_(self.FC1(self.BN1(x))))
                    x = F.log_softmax(self.FC2(self.BN2(x)), dim=1)

                    return x

                elif len(self.HL) == 2:
                    x = F.relu(self.DP_(self.FC1(self.BN1(x))))
                    x = F.relu(self.DP_(self.FC2(self.BN2(x))))
                    x = F.log_softmax(self.FC3(self.BN3(x)), dim=1)

                    return x

                elif len(self.HL) == 3:
                    x = F.relu(self.DP_(self.FC1(self.BN1(x))))
                    x = F.relu(self.DP_(self.FC2(self.BN2(x))))
                    x = F.relu(self.DP_(self.FC3(self.BN3(x))))
                    x = F.log_softmax(self.FC4(self.BN4(x)), dim=1)

                    return x
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.manual_seed(seed)
    model = Classifier(IL=IL, HL=HL, OL=OL, use_DP=use_DP, DP=DP)
    
    return device, model

# Helpers

In [None]:
def train(model, tr_data, va_data, epochs, lr, wd, device):
    def fit_(model=None, optimizer=None, scheduler=None, epochs=None,
             trainloader=None, validloader=None, criterion=nn.NLLLoss(), 
             device=None, path=None, verbose=None):
        breaker()
        print("Training ...")
        breaker()
        
        Losses, Accuracies = [], []
        
        bestLoss, bestAccs = {"train": np.inf, "valid" : np.inf}, {"train" : 0.0, "valid" : 0.0}
        DLS = {"train" : trainloader, "valid" : validloader}
        
        model.to(device)
        start_time = time()
        for e in range(epochs):
            e_st = time()
            epochLoss = {"train" : 0.0, "valid" : 0.0}
            epochAccs = {"train" : 0.0, "valid" : 0.0}
            
            for phase in ["train", "valid"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()
                
                lossPerPass, accsPerPass = [], []
                
                for X, y in DLS[phase]:
                    X, y = X.to(device), y.to(device).view(-1)
                    
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == "train"):
                        output = model(X)
                        loss = criterion(output, y)
                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                    lossPerPass.append(loss.item())
                    accsPerPass.append(getAccuracy(output, y))
                epochLoss[phase] = np.mean(np.array(lossPerPass))
                epochAccs[phase] = np.mean(np.array(accsPerPass))
            Losses.append(epochLoss)
            Accuracies.append(epochAccs)
            
            torch.save(model.state_dict(), os.path.join(path, "Epoch_{}.pt".format(e+1)))
            
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                BLE = e + 1
            
            if epochAccs["valid"] > bestAccs["valid"]:
                bestLoss = epochAccs
                BAE = e + 1
        
            if scheduler:
                scheduler.step(epochLoss["valid"])
            
            if verbose:
                print("Epoch: {} | Train Loss: {:.5f} | Valid Loss: {:.5f} | Train Accs: {:.5f} | Valid Accs: {:.5f} | Time: {:.2f} seconds".format(e+1, epochLoss["train"], epochLoss["valid"], epochAccs["train"], epochAccs["valid"], time()-e_st))
        
        breaker()
        print("Best Validation Loss at Epoch {}".format(BLE))
        breaker()
        print("Ime Taken [{} Epochs] : {:.2f} minutes".format(epochs, (time()-start_time)/60))
        breaker()
        print("Training Complete")
        breaker()
        
        return Losses, Accuracies, BLE, BAE
    
    def getAccuracy(y_pred, y_true):
        y_pred, y_true = torch.argmax(y_pred, dim=1).detach(), y_true.detach()
        
        return torch.count_nonzero(y_true == y_pred).item() / len(y_pred)
    
    
    optimizer = model.getOptimizer(lr=lr, wd=wd)
    path = "./checkpoints"
    if not os.path.exists(path):
        os.makedirs(path)
    
    L, A, BLE, BAE = fit_(model=model, optimizer=optimizer, epochs=epochs, 
                          trainloader=tr_data, validloader=va_data, device=device,
                          path=path, verbose=True)
    
    TL, VL, TA, VA = [], [], [], []

    for i in range(len(L)):
        TL.append(L[i]["train"])
        VL.append(L[i]["valid"])
        TA.append(A[i]["train"])
        VA.append(A[i]["valid"])

    x_Axis = np.arange(1, len(L)+1)
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(x_Axis, TL, "r", label="Training Loss")
    plt.plot(x_Axis, VL, "b--", label="validation Loss")
    plt.legend()
    plt.grid()
    plt.subplot(1, 2, 2)
    plt.plot(x_Axis, TA, "r", label="Training Accuracy")
    plt.plot(x_Axis, VA, "b--", label="validation Accuracy")
    plt.legend()
    plt.grid()
    plt.show()

    return L, A, BLE, BAE

# Main

In [None]:
def main():
    # Config
    batch_size = 256
    epochs = 5
    lr, wd = 1e-3, 0
    
    dataloaders, IL, OL = clean_and_build_loaders(batch_size)
    device, model = build_model(IL, [512], OL)
    _, _, BLE, _ = train(model, dataloaders["train"], dataloaders["valid"], epochs, lr, wd, device)
    
    return model, device, OL, dataloaders["test"], BLE

model, device, num_classes, ts_data, BLE = main()

# Submission

In [None]:
def make_submission(model, device, dataloader, num_classes, BLE):
    def predict_(model=None, dataloader=None, num_classes=None, device=None, path=None):
        if path:
            model.load_state_dict(torch.load(os.path.join(path)))
        
        model.to(device)
        model.eval()
        
        y_pred = torch.zeros(1, num_classes).to(device)
        
        for X in dataloader:
            X = X.to(device)
            with torch.no_grad():
                output = torch.exp(model(X))
            y_pred = torch.cat((y_pred, output), dim=0)
        
        return y_pred[1:].detach().cpu().numpy()
            
                
    y_pred = predict_(model, ts_data, num_classes, device, os.path.join("./checkpoints", "Epoch_{}.pt".format(BLE)))
    ss = pd.read_csv("../input/tabular-playground-series-jun-2021/sample_submission.csv", engine="python")
    ss.iloc[:, 1:] = y_pred
    ss.to_csv("./submission.csv", index=False)
    
make_submission(model, device, ts_data, num_classes, BLE)