# Signal Forecasting

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import warnings
import os
import gc
from IPython.display import HTML
warnings.filterwarnings("ignore")

from math import sin, cos, pi

from timeit import default_timer as timer
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from transformers import get_linear_schedule_with_warmup

from sklearn.preprocessing import StandardScaler

random_state = 42

### Read Data

In [3]:
data = pd.read_csv(os.path.join('data', 'tabular_playground_series_april_2022', 'train.csv'))
data_labels = pd.read_csv(os.path.join('data', 'tabular_playground_series_april_2022', 'train_labels.csv'))
test_data  = pd.read_csv(os.path.join('data', 'tabular_playground_series_april_2022', 'test.csv'))
submission = pd.read_csv(os.path.join('data', 'tabular_playground_series_april_2022', 'sample_submission.csv'))

In [4]:
data[:5]

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,sensor_08,sensor_09,sensor_10,sensor_11,sensor_12
0,0,47,0,-0.196291,0.112395,1.0,0.329204,-1.00466,-0.131638,-0.127505,0.368702,-0.1,-0.963873,-0.985069,0.531893,4.751492
1,0,47,1,-0.44745,0.134454,1.0,-0.658407,0.162495,0.340314,-0.209472,-0.867176,0.2,-0.301301,0.082733,-0.231481,0.45439
2,0,47,2,0.326893,-0.694328,1.0,0.330088,0.473678,1.280479,-0.094718,0.535878,1.4,1.002168,0.449221,-0.58642,-4.736147
3,0,47,3,0.523184,0.75105,1.0,0.976991,-0.563287,-0.720269,0.79326,0.951145,-0.3,-0.995665,-0.43429,1.34465,0.429241
4,0,47,4,0.272025,1.07458,1.0,-0.136283,0.398579,0.044877,0.560109,-0.541985,-0.9,1.055636,0.812631,0.123457,-0.223359


#### Preprocessing

In [5]:
scaler = StandardScaler()
data = data.drop(['sequence', 'subject', 'step'], axis = 1)
data = scaler.fit_transform(data)

### Train_test_split
we need to split the train and train_label data into train and test. we will use 85% of the data for training and 15% for testing

In [6]:
test_q = 0.85

train_size = int(test_q * len(data) - (test_q * len(data) % 60))
train_label_size = int(test_q * len(data_labels))

X_train, y_train = data[:train_size], data_labels[:train_label_size]
X_test, y_test = data[train_size:], data_labels[train_label_size:]

In [7]:
X_train.shape, X_test.shape

((1324320, 13), (233760, 13))

## Datasets
### TrainDataset
we need to define the train dataset class

In [8]:
class TrainDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, seq_num):
        super().__init__()
        self.X = X
        self.y = y
        self.seq_num = seq_num
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx[0]//self.seq_num]

#### TestDataset
and the test dataset class

In [9]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, X, seq_num):
        super().__init__()
        self.X = X
        self.seq_num = seq_num
    def __len__(self):
        return len(self.X) // 60
    def __getitem__(self, idx):
        return self.X[idx]

In [10]:
def prepare_data(data, data_labels, seq_num, data_num, mode='train'):
    """

    :param data:
    :param data_labels:
    :param seq_num:
    :param data_num:
    :param mode:
    :return:
    """
    if data_labels is not None:
        data_labels = data_labels['state'].values
    
    sampler = np.array([list(range(i * seq_num, (i + 1) * seq_num)) for i in range(data_num // seq_num)])
    if mode == 'train':
        dataset = TrainDataset(data, data_labels, seq_num)
    else:
        dataset = TestDataset(data, seq_num)

    dataloader = DataLoader(dataset, batch_size=64, sampler=sampler)
    return dataloader

In [11]:
train_dataloader = prepare_data(X_train, y_train, 60, X_train.shape[0])
test_dataloader = prepare_data(X_test, y_test, 60, X_test.shape[0])

# Model: LSTM
Long Short Term Memory networks – usually just called “LSTMs” – are a special kind of RNN, capable of learning long-term dependencies. They were introduced by Hochreiter & Schmidhuber (1997), and were refined and popularized by many people in following work.1 They work tremendously well on a large variety of problems, and are now widely used.

LSTMs are explicitly designed to avoid the long-term dependency problem. Remembering information for long periods of time is practically their default behavior, not something they struggle to learn!

All recurrent neural networks have the form of a chain of repeating modules of neural network. In standard RNNs, this repeating module will have a very simple structure, such as a single tanh layer.

<center><a><img src="https://hsto.org/web/67b/04f/73b/67b04f73b4c34ba38edfa207e09de07c.png" alt="header" border="0" width=800 height=400 class="center"></a>

But we use Biderection LSTM. A Bidirectional LSTM, or biLSTM, is a sequence processing model that consists of two LSTMs: one taking the input in a forward direction, and the other in a backwards direction. BiLSTMs effectively increase the amount of information available to the network, improving the context available to the algorithm (e.g. knowing what words immediately follow and precede a word in a sentence).

In Model we use:
- BiLSTM
- Linear layer

## Model
define the model class

In [12]:
class LSTM(nn.Module):
    def __init__(self,
         seq_num = 60,
         input_dim = 13,
         lstm_dim = 512,
         num_layers = 2,
         num_classes = 1
    ):
        super().__init__()

        self.lstm = nn.LSTM(input_dim, lstm_dim, num_layers, batch_first=True, bidirectional=True)
        
        self.lstm1 = nn.LSTM(2 * lstm_dim, lstm_dim, num_layers, batch_first=True, bidirectional=True)
        
        self.lstm2 = nn.LSTM(2 * lstm_dim, lstm_dim, num_layers, batch_first=True, bidirectional=True)
        
        self.logits = nn.Sequential(
            nn.ReLU(),
            nn.Linear(lstm_dim * seq_num * 2, num_classes),
        )

    def forward(self, x):
        features, _ = self.lstm(x)
        features, _ = self.lstm1(features)
        features, _ = self.lstm2(features)
        features = features.reshape(features.shape[0], -1)
        pred = self.logits(features)
        return pred

## Train
Function to train model:

In [13]:
def train(
    epochs,
    model,
    optimizer,
    criterion,
    sheduler,
    train_iterator,
    valid_iterator
):  
    for epoch in range(1, epochs + 1):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
            
        for batch_idx, batch in enumerate(train_iterator):
            optimizer.zero_grad()
            batch[0] = batch[0].to(device)
            predict = model(batch[0].float()).squeeze(-1)
            loss = criterion(predict, batch[1].to(device).float())
            loss.backward()
            optimizer.step()
            sheduler.step()
            training_loss += loss.data.item()
        training_loss /= len(train_iterator)
        
        model.eval()
        
        for batch_idx, batch in enumerate(valid_iterator):
            batch[0] = batch[0].to(device)
            predict = model(batch[0].float()).squeeze(-1)
            loss = criterion(predict, batch[1].to(device).float())
            valid_loss += loss.data.item()
            
        valid_loss /= len(valid_iterator)
        
        if epoch % 10 == 1:
            print('Epoch: {}, Training Loss: {:.5f}, '
                 'Validation Loss: {:.5f}'.format(epoch, training_loss, valid_loss))

## Predict
Function to predict model. We use Intellex extension for Pytorch to accelerate inference:

In [14]:
def predict(
    model,
    loader,
):
    model.eval()
    
    preds = []
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            pred = model(data.float())
            preds.append(pred.detach().cpu().numpy())

    preds = np.concatenate(preds, 0)
    
    return preds

# Learning Cycle
In training stage we use:
* Learning rate = 0.0001 and sheduler to update learning rate
* Optimizer - Adam
* Loss - MSELoss
* Epochs = 200

In [15]:
epochs = 200
batch_size = 512
seq_num = 60
lr = 1e-4

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
print(f"Using device: {device}")

model = LSTM()
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

num_warmup_steps = int(0.1 * epochs * len(train_dataloader))
num_training_steps = int(epochs * len(train_dataloader))

sheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps, num_training_steps
)

criterion = nn.MSELoss()

train(epochs, model, optimizer, criterion, sheduler, train_dataloader, test_dataloader)

Using device: cuda
Epoch: 1, Training Loss: 0.35960, Validation Loss: 0.24898
Epoch: 11, Training Loss: 0.13831, Validation Loss: 0.14007
Epoch: 21, Training Loss: 0.09980, Validation Loss: 0.10962
Epoch: 31, Training Loss: 0.05812, Validation Loss: 0.10305
Epoch: 41, Training Loss: 0.02934, Validation Loss: 0.10767
Epoch: 51, Training Loss: 0.01189, Validation Loss: 0.10492
Epoch: 61, Training Loss: 0.00679, Validation Loss: 0.10456
Epoch: 71, Training Loss: 0.00456, Validation Loss: 0.09518
Epoch: 81, Training Loss: 0.00350, Validation Loss: 0.09550
Epoch: 91, Training Loss: 0.00213, Validation Loss: 0.09487
Epoch: 101, Training Loss: 0.00164, Validation Loss: 0.09467
Epoch: 111, Training Loss: 0.00147, Validation Loss: 0.09566
Epoch: 121, Training Loss: 0.00180, Validation Loss: 0.09464
Epoch: 131, Training Loss: 0.00027, Validation Loss: 0.09379
Epoch: 141, Training Loss: 0.00009, Validation Loss: 0.09375
Epoch: 151, Training Loss: 0.00026, Validation Loss: 0.09382
Epoch: 161, Trai

### Prediction
now we apply our trained model to the test data & then make predictions

In [16]:
test_data = test_data.drop(['sequence', 'subject', 'step'], axis = 1)
test_data = scaler.transform(test_data)

In [17]:
test_data.shape

(733080, 13)

In [18]:
loader = prepare_data(test_data, None, 60, test_data.shape[0], 'test')

In [19]:
pred = predict(model, loader)

In [20]:
submission['state'] = pd.DataFrame(pred)

In [21]:
submission['state']

0        0.996635
1        0.999785
2        0.000708
3        1.003762
4        0.831336
           ...   
12213    0.291392
12214    0.959934
12215    0.764666
12216   -0.002901
12217    0.001709
Name: state, Length: 12218, dtype: float32

In [22]:
submission.to_csv("submit.csv", index = False)

In [23]:
gc.collect()

72