In [None]:
import math
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import seaborn as sns
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, SequentialSampler
import pickle
import pandas as pd
import numpy as np
import math
import pickle

# please check my notebook for data preprocessing
https://www.kaggle.com/ahmedmoabbas/pytorch-models-getting-data

In [None]:
data_path = '../input/pytorch-models-getting-data'

In [None]:
data = pickle.load(open(data_path+'/preprocessed_data','rb'))
train = data['train']
test=data['test']

In [None]:
train.head()

In [None]:
class Data(Dataset):
    def __init__(self, df, flip =0):
        super().__init__()
        self.df = df.values.tolist()
        self.flip = flip
    def __getitem__(self, idx):
        row = self.df[idx]
        tensors = torch.as_tensor(row[:-2], dtype = torch.float)
        segment = torch.as_tensor(row[-3], dtype=torch.long)
        target = torch.as_tensor(row[-2], dtype=torch.float)
        if np.random.rand() < self.flip:
            tensors = tensors.flip(-1)
        return {
            'tensors':tensors,
            'segment':segment,
            'target':target,
        }
    def __len__(self):
        return len(self.df)

# one of the methods by with we can feed our models by benefitable demintion of time is to transform the timesteps to learnable vector that can learn the meaning of the time 
# please check this paper  
https://openreview.net/pdf?id=rklklCVYvB

In [None]:
# we can use either the Sine or the Cosine funtion
class TimeToVector(nn.Module):
    def __init__(self, n_inputs, seq_len, n_outputs, act=torch.cos):
        super(TimeToVector, self).__init__()
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.seq_len = seq_len
        self.w_weiht = nn.parameter.Parameter(torch.randn(self.n_inputs, n_outputs-1))
        self.b_weight = nn.parameter.Parameter(torch.randn(self.seq_len, n_outputs-1))
        
        self.w_bias = nn.parameter.Parameter(torch.randn(self.n_inputs, 1))
        self.b_bias = nn.parameter.Parameter(torch.randn(self.seq_len, 1))
        self.act  = act
    def forward(self, inputs):
        bias = torch.matmul(inputs, self.w_bias) + self.b_bias
        weights = self.act(torch.matmul(inputs, self.w_weiht) + self.b_weight)
        return torch.cat([weights, bias], -1)

### we can concatenate this time vector with our features befor feeding them to the model or in any stage of the model

### Here is an exapmle of encoding our features concatenated with the time vector

In [None]:
class Encoder(nn.Module):
    def __init__(self, n_inputs, d_emb, d_time):
        super(Encoder, self).__init__()
        self.act = nn.ReLU()
        self.rnn = nn.LSTM(n_inputs + d_time , d_emb//2, 3,batch_first=True, bidirectional=True)
        self.time = TimeToVector(35,80,d_time)
    def forward(self, input):
        time = self.time(input)
        input = torch.cat([input, time], -1)
        out, hidden = self.rnn(input)
        return out

In [None]:
#here we will add 10 extra features as time representation
encoder = Encoder(n_inputs=35, d_emb=512, d_time=10)

In [None]:
loader = DataLoader(Data(train), 64)
features = next(iter(loader))['tensors'].permute(0,2,1)
features.shape

In [None]:
#here is our outputs that we can feed to the next stage of the model
encoder(features).shape

## Another way for step representation is using the position embedding like what transformers models use to add to the words embedding to give each word extra meaning depends on its position in the sentence 


In [None]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model: int, max_len: int = 80):
        super().__init__()
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return x

In [None]:
class Encoder2(nn.Module):
    def __init__(self, n_inputs, d_emb):
        super(Encoder2, self).__init__()
        self.d_emb = d_emb
        self.act = nn.ReLU(True)
        self.position = PositionalEncoder(d_emb)
        self.rnn = nn.LSTM(n_inputs, d_emb//2, 3,batch_first=True, bidirectional=True)
        self.norm = nn.LayerNorm(d_emb)
        
    def forward(self, input):
        out, hidden = self.rnn(input)
        out = out * math.sqrt(self.d_emb) # here we giving the main features -or the main embedding- 
        #extra importance befor adding the position embedding
        out = self.position(out)
        out =self.norm(out)
        return out

In [None]:
#here is the output before sending to the next stage
encoder2 = Encoder2(35, 512)
encoder2(features).shape