<div style="background-color:rgba(0, 167, 255, 0.6);border-radius:5px;display:fill">
    <h1><center>Tabular Playground Series - Apr 2022
</div>

<center><a><img src="https://i.ibb.co/PWvpT9F/header.png" alt="header" border="0" width=800 height=400 class="center"></a>

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import warnings
import gc
from IPython.display import HTML
warnings.filterwarnings("ignore")

from math import sin, cos, pi

from timeit import default_timer as timer
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

random_state = 42

### Install Pytorch

In [None]:
!pip install torch==1.11.0  -q --progress-bar off

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from transformers import get_linear_schedule_with_warmup

### Reading Data

In [None]:
PATH_TRAIN      = '../input/tabular-playground-series-apr-2022/train.csv'
PATH_LABELS     = '../input/tabular-playground-series-apr-2022/train_labels.csv'
PATH_TEST       = '../input/tabular-playground-series-apr-2022/test.csv'
PATH_SUBMISSION = '../input/tabular-playground-series-apr-2022/sample_submission.csv'

In [None]:
data = pd.read_csv(PATH_TRAIN)
data_labels = pd.read_csv(PATH_LABELS)
test_data  = pd.read_csv(PATH_TEST)
submission = pd.read_csv(PATH_SUBMISSION)

In [None]:
data[:5]

#### Preprocessing

In [None]:
scaler = StandardScaler()
data = data.drop(['sequence', 'subject', 'step'], axis = 1)
data = scaler.fit_transform(data)

#### Train_test_split

In [None]:
test_q = 0.85

train_size = int(test_q * len(data) - (test_q * len(data) % 60))
train_label_size = int(test_q * len(data_labels))

X_train, y_train = data[:train_size], data_labels[:train_label_size]
X_test, y_test = data[train_size:], data_labels[train_label_size:]

In [None]:
X_train.shape, X_test.shape

### Dataset

#### TrainDataset

In [None]:
class TrainDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, seq_num):
        super().__init__()
        self.X = X
        self.y = y
        self.seq_num = seq_num
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx[0]//self.seq_num]

#### TestDataset

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, X, seq_num):
        super().__init__()
        self.X = X
        self.seq_num = seq_num
    def __len__(self):
        return len(self.X) // 60
    def __getitem__(self, idx):
        return self.X[idx]

In [None]:
def prepare_data(data, data_labels, seq_num, data_num, mode='train'):
    if data_labels is not None:
        data_labels = data_labels['state'].values
    
    sampler = np.array([list(range(i * seq_num, (i + 1) * seq_num)) for i in range(data_num // seq_num)])
    if mode == 'train':
        dataset = TrainDataset(data, data_labels, seq_num)
    else:
        dataset = TestDataset(data, seq_num)

    dataloader = DataLoader(dataset, batch_size=64, sampler=sampler)
    return dataloader

In [None]:
train_dataloader = prepare_data(X_train, y_train, 60, X_train.shape[0])
test_dataloader = prepare_data(X_test, y_test, 60, X_test.shape[0])

### Model

#### LSTM

Long Short Term Memory networks – usually just called “LSTMs” – are a special kind of RNN, capable of learning long-term dependencies. They were introduced by Hochreiter & Schmidhuber (1997), and were refined and popularized by many people in following work.1 They work tremendously well on a large variety of problems, and are now widely used.

LSTMs are explicitly designed to avoid the long-term dependency problem. Remembering information for long periods of time is practically their default behavior, not something they struggle to learn!

All recurrent neural networks have the form of a chain of repeating modules of neural network. In standard RNNs, this repeating module will have a very simple structure, such as a single tanh layer.

<center><a><img src="https://hsto.org/web/67b/04f/73b/67b04f73b4c34ba38edfa207e09de07c.png" alt="header" border="0" width=800 height=400 class="center"></a>

But we use Biderection LSTM.

A Bidirectional LSTM, or biLSTM, is a sequence processing model that consists of two LSTMs: one taking the input in a forward direction, and the other in a backwards direction. BiLSTMs effectively increase the amount of information available to the network, improving the context available to the algorithm (e.g. knowing what words immediately follow and precede a word in a sentence).

In Model we use:
* BiLSTM
* Linear layer

In [None]:
class LSTM(nn.Module):
    def __init__(self,
         seq_num = 60,
         input_dim = 13,
         lstm_dim = 512,
         num_layers = 2,
         num_classes = 1
    ):
        super().__init__()

        self.lstm = nn.LSTM(input_dim, lstm_dim, num_layers, batch_first=True, bidirectional=True)
        
        self.lstm1 = nn.LSTM(2 * lstm_dim, lstm_dim, num_layers, batch_first=True, bidirectional=True)
        
        self.lstm2 = nn.LSTM(2 * lstm_dim, lstm_dim, num_layers, batch_first=True, bidirectional=True)
        
        self.logits = nn.Sequential(
            nn.ReLU(),
            nn.Linear(lstm_dim * seq_num * 2, num_classes),
        )

    def forward(self, x):
        features, _ = self.lstm(x)
        features, _ = self.lstm1(features)
        features, _ = self.lstm2(features)
        features = features.reshape(features.shape[0], -1)
        pred = self.logits(features)
        return pred

### Train

Function to train model:

In [None]:
def train(
    epochs,
    model,
    optimizer,
    criterion,
    sheduler,
    train_iterator,
    valid_iterator
):  
    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
            
        for batch_idx, batch in enumerate(train_iterator):
            optimizer.zero_grad()
            batch[0] = batch[0].to(device)
            predict = model(batch[0].float()).squeeze(-1)
            loss = criterion(predict, batch[1].to(device).float())
            loss.backward()
            optimizer.step()
            sheduler.step()
            training_loss += loss.data.item()
        training_loss /= len(train_iterator)
        
        model.eval()
        
        for batch_idx, batch in enumerate(valid_iterator):
            batch[0] = batch[0].to(device)
            predict = model(batch[0].float()).squeeze(-1)
            loss = criterion(predict, batch[1].to(device).float())
            valid_loss += loss.data.item()
            
        valid_loss /= len(valid_iterator)
        
        if (epoch % 10 == 1):
            print('Epoch: {}, Training Loss: {:.5f}, '
                 'Validation Loss: {:.5f}'.format(epoch, training_loss, valid_loss))

Function to predict model:

### Predict

We use Intel Extension for Pytorch in inference:

In [None]:
def predict(
    model,
    loader,
):
    model.eval()
    
    preds = []
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            pred = model(data.float())
            preds.append(pred.detach().cpu().numpy())

    preds = np.concatenate(preds, 0)
    
    return preds

### Learning Cycle

In training stage we use:
* Learning rate = 0.0001 and sheduler to update learning rate
* Optimizer - Adam
* Loss - MSELoss
* Epochs = 120

In [None]:
epochs = 120
batch_size = 512
seq_num = 60

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
print(f"Using device: {device}")

model = LSTM()
model = model.to(device)

lr = 1e-3

optimizer = torch.optim.Adam(model.parameters(), lr = lr)

num_warmup_steps = int(0.1 * epochs * len(train_dataloader))
num_training_steps = int(epochs * len(train_dataloader))

sheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps, num_training_steps
)

criterion = nn.BCEWithLogitsLoss()

train(epochs, model, optimizer, criterion, sheduler, train_dataloader, test_dataloader)

### Prediction

In [None]:
test_data = test_data.drop(['sequence', 'subject', 'step'], axis = 1)
test_data = scaler.transform(test_data)

In [None]:
test_data.shape

In [None]:
loader = prepare_data(test_data, None, 60, test_data.shape[0], 'test')

In [None]:
pred = predict(model, loader)

In [None]:
submission['state'] = pd.DataFrame(pred)

In [None]:
submission['state']

In [None]:
submission.to_csv("submit.csv", index = False)