In [None]:
import numpy as np
import pandas as pd
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from sklearn.preprocessing import StandardScaler

In [None]:
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv').drop('id', axis=1)
test = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv').drop('id', axis=1)
ss = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
X = train.drop("target", axis=1).copy()
y = train["target"].copy()
X_test = test.copy()

del train
gc.collect()
del test
gc.collect()

In [None]:
scaler = StandardScaler()

X = pd.DataFrame(columns=X.columns, data=scaler.fit_transform(X))
X_test = pd.DataFrame(columns=X_test.columns, data=scaler.transform(X_test))

In [None]:
class CustomDataset:
    
    def __init__(self, data, target=None):
        self.data = data
        self.target = target
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        
        if self.target is not None:
            current_sample = self.data.values[idx]
            current_target = self.target.values[idx]
            
            return torch.tensor(current_sample, dtype= torch.float), torch.tensor(current_target, dtype= torch.float)
        else:
            current_sample = self.data.values[idx]
            return torch.tensor(current_sample, dtype= torch.float)

In [None]:
class NNModel(nn.Module):
    
    def __init__(self, features, activation = F.relu):
        super(NNModel, self).__init__()
        
        """Number of input is no of features(100)"""
        self.layer_1 = nn.Linear(features, 264)
        self.batchnorm1 = nn.BatchNorm1d(264)
        self.layer_2 = nn.Linear(264, 128)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.layer_3 = nn.Linear(128, 64)
        self.batchnorm3 = nn.BatchNorm1d(64)
        self.layer_4 = nn.Linear(64,32)
        self.batchnorm4 = nn.BatchNorm1d(32)
        self.layer_out = nn.Linear(32,1)
        self.flatten = nn.Flatten()
        self.activation = activation
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.batchnorm1(self.activation(self.layer_1(x)))
        x = self.batchnorm2(self.activation(self.layer_2(x)))
        x = self.batchnorm3(self.activation(self.layer_3(x)))
        x = self.batchnorm4(self.activation(self.layer_4(x)))
        x = torch.sigmoid(self.layer_out(x))
        
        return torch.squeeze(x, dim=1)

In [None]:
def initialize_weights(self):
    for m in self.modules():
        if isinstance(m, nn.Linear):
            nn.init.kaiming_uniform_(m.weight.data, nonlinearity="relu")
            
            if m.bias is not None:
                nn.init.constant_(m.bias.data, 0)
        
        elif isinstance(m, nn.BatchNorm1d):
            nn.init.constant_(m.weight.data, 1)
            nn.init.constant_(m.bias.data, 0)

In [None]:
device = "cuda" if torch.cuda.is_available() else 'cpu'
BATCH_SIZE = 1024
features = X.columns

In [None]:
def train_model(dataloader, model, criterion, optimizer):
    size = len(dataloader.dataset)
    model.train()
    batches = len(dataloader)
    train_loss = 0
    
    for batch_idx, (X,y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        scores = model(X)
        loss = criterion(scores, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss = loss.item()
        train_loss += loss
        
    train_loss_avg = train_loss/batches
    print(f"avg. train loss: {train_loss_avg}")
    return train_loss_avg

In [None]:
def val_model(dataloader, model, criterion):
    
    size= len(dataloader.dataset)
    batches= len(dataloader)
    model.eval()
    test_loss= 0

    with torch.no_grad():
        for X, y in (dataloader):
            X, y= X.to(device), y.to(device)
      
            scores= model(X)
            test_loss += criterion(scores, y)

    test_loss /= batches
    print(f"avg test loss : {test_loss}")
    return test_loss

In [None]:
def predict_model(dataloader, model):
    model.eval()
    y_pred= np.array([])
    
    with torch.no_grad():
        for X in dataloader:
            X = X.to(device)
            
            preds= model(X)
            preds= preds.flatten().cpu().numpy()
            
            y_pred= np.concatenate((y_pred, preds))
            
    return y_pred

In [None]:
from sklearn.model_selection import StratifiedKFold

KFold = StratifiedKFold(n_splits=5, random_state=2021, shuffle=True)
EPOCHS = 100
cv_scores = []
predictions = np.zeros(X_test.shape[0])

for fold, (train_idx, val_idx) in enumerate(KFold.split(X, y)):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
    
    train_dataset = CustomDataset(data=X_train, target=y_train)
    val_dataset = CustomDataset(data=X_val, target=y_val)
    
    train_loader = data.DataLoader(train_dataset, batch_size = BATCH_SIZE)
    val_loader = data.DataLoader(val_dataset, batch_size = BATCH_SIZE)
    
    model = NNModel(features=len(features), activation=F.hardswish).to(device)
    model.apply(initialize_weights)
    
    criterion = nn.BCELoss()
    criterion.to(device)
    
    optimizer= optim.Adam(model.parameters(), lr= 0.001)
    scheduler= optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                    factor= 0.5,
                                                    patience= 10,
                                                    verbose= True)
    best_valid_loss = float('inf')
    
    avg_train_losses = []
    avg_val_losses = []
    
    print(10*"::", f"Fold={fold+1}", 10*"::")
    
    for t in range(EPOCHS):
        print(f"Epoch: {t+1}")
        train_loss = train_model(train_loader, model, criterion, optimizer)
        val_loss = val_model(val_loader, model, criterion)
        
        avg_train_losses.append(train_loss)
        avg_val_losses.append(val_loss)
        
        if (val_loss < best_valid_loss):
            best_valid_loss= val_loss
            ofilename = 'TPS%d.pth' % fold
            torch.save(model.state_dict(),  ofilename)
        
        scheduler.step(val_loss)
        
    cv_scores.append(best_valid_loss)
    
    test_dataset = CustomDataset(data = X_test, target = None)
    test_loader = data.DataLoader(test_dataset, batch_size = BATCH_SIZE)
                       
    model.load_state_dict(torch.load('TPS%d.pth' % fold, map_location=device))
    predictions += (predict_model(test_loader, model)/5)

In [None]:
ss['target'] = predictions
ss.to_csv('submission.csv',index = False)

In [None]:
ss.head()

# Reference

* https://www.kaggle.com/sagarikajadon/simple-lstm-pytorch