<h2>Stock Market Transformer Model</h2>


In [10]:
from abc import ABC

import pandas as pd
import numpy as np
import matplotlib.pyplot
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import TorchNormalizer
import torch.nn as nn
import torch.optim as optim
import torch
from torch import Tensor
from torch.utils.data import Dataset, DataLoader
from pytorch_forecasting.models.base_model import BaseModel, BaseModelWithCovariates


<h3>Data</h3>

In [91]:
import yfinance as yf
data = yf.download(tickers="AAPL", period='max', interval='1d', groupby='ticker', auto_adjust='True')
data.head()

#create random data


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-12-12,0.100323,0.100759,0.100323,0.100323,469033600
1980-12-15,0.095525,0.095525,0.095089,0.095089,175884800
1980-12-16,0.088546,0.088546,0.08811,0.08811,105728000
1980-12-17,0.090291,0.090727,0.090291,0.090291,86441600
1980-12-18,0.092908,0.093345,0.092908,0.092908,73449600


In [92]:
data.reset_index(inplace=True)
data.index = data.index.set_names(["order"])
data.reset_index(inplace=True)#to keep up with order
data['Target'] = data["Close"].shift(-1)
data["Date"] = data["Date"].apply(lambda x: x.value/10**9)
data.head()

Unnamed: 0,order,Date,Open,High,Low,Close,Volume,Target
0,0,345427200.0,0.100323,0.100759,0.100323,0.100323,469033600,0.095089
1,1,345686400.0,0.095525,0.095525,0.095089,0.095089,175884800,0.08811
2,2,345772800.0,0.088546,0.088546,0.08811,0.08811,105728000,0.090291
3,3,345859200.0,0.090291,0.090727,0.090291,0.090291,86441600,0.092908
4,4,345945600.0,0.092908,0.093345,0.092908,0.092908,73449600,0.098578


In [93]:
from sklearn.model_selection import train_test_split
X = data.drop(["Target"], axis=1)
y = data["Target"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1, random_state=42, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, shuffle=False)

In [94]:
Tensor(X_train.values)

tensor([[0.0000e+00, 3.4543e+08, 1.0032e-01,  ..., 1.0032e-01, 1.0032e-01,
         4.6903e+08],
        [1.0000e+00, 3.4569e+08, 9.5525e-02,  ..., 9.5089e-02, 9.5089e-02,
         1.7588e+08],
        [2.0000e+00, 3.4577e+08, 8.8546e-02,  ..., 8.8110e-02, 8.8110e-02,
         1.0573e+08],
        ...,
        [8.4250e+03, 1.3996e+09, 1.8702e+01,  ..., 1.8568e+01, 1.8734e+01,
         2.9160e+08],
        [8.4260e+03, 1.3999e+09, 1.8797e+01,  ..., 1.8794e+01, 1.8968e+01,
         2.1321e+08],
        [8.4270e+03, 1.3999e+09, 1.8941e+01,  ..., 1.8899e+01, 1.8997e+01,
         1.5974e+08]])

In [95]:
X_train.shape

(8428, 7)

In [96]:
class StockDataset(Dataset):
    #must overwrite __getitem__, __len__, __
    def __init__(self, x, y, sequence_length):
        self.x = x
        self.y = y #shifted close price
        self.seq_length = sequence_length #can be thought of as the window size


    def __len__(self):
        #length means how many sequences
        return self.x.shape[0] - self.seq_length

    #assuming __getsize__ will be used by data loader to figure out when to stop

    def __getitem__(self, idx):
        #return sequences
        #return

        return Tensor(self.x.iloc[[idx]].values), Tensor(self.y.iloc[[idx]].values)

In [109]:
dataset_train = StockDataset(X_train, y_train, 30)
dataset_val = StockDataset(X_val, y_val, 30)

train_dataloader = DataLoader(dataset_train, batch_size=16, shuffle=True)
val_dataloader = DataLoader(dataset_val, batch_size=16, shuffle=True)

In [108]:
X_train.tail()

Unnamed: 0,order,Date,Open,High,Low,Close,Volume
8423,8423,1399421000.0,18.939178,19.004085,18.699913,18.846272,282864400
8424,8424,1399507000.0,18.820993,19.018083,18.761804,18.812675,230297200
8425,8425,1399594000.0,18.702297,18.757009,18.567598,18.734293,291597600
8426,8426,1399853000.0,18.796681,18.994088,18.7938,18.967533,213208800
8427,8427,1399939000.0,18.940975,19.022242,18.899384,18.997286,159737200


In [99]:
X_train.iloc[[i for i in range(0,17)]]

Unnamed: 0,order,Date,Open,High,Low,Close,Volume
0,0,345427200.0,0.100323,0.100759,0.100323,0.100323,469033600
1,1,345686400.0,0.095525,0.095525,0.095089,0.095089,175884800
2,2,345772800.0,0.088546,0.088546,0.08811,0.08811,105728000
3,3,345859200.0,0.090291,0.090727,0.090291,0.090291,86441600
4,4,345945600.0,0.092908,0.093345,0.092908,0.092908,73449600
5,5,346032000.0,0.098578,0.099015,0.098578,0.098578,48630400
6,6,346291200.0,0.103376,0.103813,0.103376,0.103376,37363200
7,7,346377600.0,0.107739,0.108175,0.107739,0.107739,46950400
8,8,346464000.0,0.113409,0.113845,0.113409,0.113409,48003200
9,9,346636800.0,0.123877,0.124314,0.123877,0.123877,55574400


In [110]:
iterable = iter(train_dataloader)
*_, last = iterable
print(last)

[tensor([[[3.8600e+03, 8.2728e+08, 1.9687e-01, 1.9687e-01, 1.9209e-01,
          1.9304e-01, 1.1599e+08]],

        [[2.7500e+03, 6.8869e+08, 3.7502e-01, 3.7866e-01, 3.6956e-01,
          3.7684e-01, 1.0124e+08]],

        [[6.9270e+03, 1.2119e+09, 5.7312e+00, 5.7477e+00, 5.6184e+00,
          5.7190e+00, 7.4398e+08]],

        [[3.3400e+02, 3.8742e+08, 6.1066e-02, 6.1503e-02, 6.0630e-02,
          6.0630e-02, 4.4307e+07]],

        [[4.8610e+03, 9.5247e+08, 9.3942e-01, 9.4754e-01, 9.0645e-01,
          9.3273e-01, 2.7123e+08]],

        [[1.9960e+03, 5.9452e+08, 2.6280e-01, 2.6456e-01, 2.5927e-01,
          2.6191e-01, 2.4246e+08]],

        [[2.4960e+03, 6.5690e+08, 2.1443e-01, 2.2524e-01, 2.1443e-01,
          2.1623e-01, 1.3420e+08]],

        [[1.0460e+03, 4.7598e+08, 1.0425e-01, 1.0469e-01, 1.0120e-01,
          1.0120e-01, 2.7624e+08]],

        [[2.1160e+03, 6.0964e+08, 2.8018e-01, 2.8372e-01, 2.7663e-01,
          2.7929e-01, 1.3938e+08]],

        [[7.3770e+03, 1.2683e+09, 6.

In [106]:
second = next(iterable)
second

[tensor([[[1.6000e+01, 3.4767e+08, 1.0817e-01, 1.0817e-01, 1.0774e-01,
           1.0774e-01, 5.5686e+07]],
 
         [[1.7000e+01, 3.4776e+08, 1.0599e-01, 1.0599e-01, 1.0556e-01,
           1.0556e-01, 3.9827e+07]],
 
         [[1.8000e+01, 3.4785e+08, 1.1123e-01, 1.1166e-01, 1.1123e-01,
           1.1123e-01, 2.1504e+07]],
 
         [[1.9000e+01, 3.4811e+08, 1.1123e-01, 1.1123e-01, 1.1036e-01,
           1.1036e-01, 2.3699e+07]],
 
         [[2.0000e+01, 3.4819e+08, 1.0687e-01, 1.0687e-01, 1.0643e-01,
           1.0643e-01, 2.3050e+07]],
 
         [[2.1000e+01, 3.4828e+08, 1.0687e-01, 1.0730e-01, 1.0687e-01,
           1.0687e-01, 1.4291e+07]],
 
         [[2.2000e+01, 3.4836e+08, 1.0905e-01, 1.0992e-01, 1.0905e-01,
           1.0905e-01, 1.4067e+07]],
 
         [[2.3000e+01, 3.4845e+08, 1.0861e-01, 1.0861e-01, 1.0817e-01,
           1.0817e-01, 1.3395e+07]],
 
         [[2.4000e+01, 3.4871e+08, 1.1472e-01, 1.1515e-01, 1.1472e-01,
           1.1472e-01, 4.1574e+07]],
 
         [

In [5]:
random_timeseries = pd.DataFrame(
    dict(
    date=[x for x in range(1,2001)],
    open=np.random.rand(2000) + 0.2,
    close=np.random.rand(2000) + 1000,
        high=np.random.rand(2000) + 18,
        low = np.random.rand(2000) + 2,
        volume=np.random.rand(2000) + 2
    ))
random_timeseries["target"] = random_timeseries.close.shift(-1)
random_timeseries.fillna(3, inplace=True)
random_timeseries["constant"] = np.ones(2000)
random_timeseries.head()

Unnamed: 0,date,open,close,high,low,volume,target,constant
0,1,0.375879,1000.192535,18.738922,2.274733,2.790435,1000.404939,1.0
1,2,0.434617,1000.404939,18.355642,2.369738,2.848,1000.732609,1.0
2,3,1.16794,1000.732609,18.30561,2.467036,2.082916,1000.470478,1.0
3,4,0.675451,1000.470478,18.195614,2.061493,2.797018,1000.834811,1.0
4,5,1.002993,1000.834811,18.729299,2.860682,2.823988,1000.111026,1.0


In [6]:
#what features constitute a single timeseries sample
group_ids = ['constant']
target='target'
time_idx = 'date'

max_prediction_length=1 #one day
#list of continous variables that change over time and are not known in the future
time_varying_unknown_reals = ['open', 'close', 'high', 'low', 'volume', 'target']
#normalizer, add later
#target_normalizer= TorchNormalizer()
max_encoder_length = 5
min_encoder_length = 0

#add no
pytorch_random_dataset = TimeSeriesDataSet(random_timeseries,
                                           group_ids=group_ids,
                                           time_idx=time_idx,
                                           target=target,
                                           min_prediction_length=1,
                                           max_prediction_length=max_prediction_length,
                                           max_encoder_length=max_encoder_length,
                                           min_encoder_length=min_encoder_length,
                                           allow_missing_timesteps=True,
                                           time_varying_unknown_reals=time_varying_unknown_reals,
                                           #target_normalizer=target_normalizer
                                           )

In [7]:
#check out dataloader which we will use to feed data to the model's forward method
data_loader = pytorch_random_dataset.to_dataloader(batch_size=16, shuffle=False)

In [8]:
x,y = next(iter(data_loader))

  target_scale = torch.tensor([batch[0]["target_scale"] for batch in batches], dtype=torch.float)


In [9]:
print("x =", x)
print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")

x = {'encoder_cat': tensor([], size=(16, 5, 0), dtype=torch.int64), 'encoder_cont': tensor([[[ 1.0000e+00, -1.1349e+00, -1.0375e+00,  8.5500e-01, -7.7623e-01,
           1.0069e+00,  1.8451e-02],
         [ 1.0000e+00, -9.3160e-01, -3.0177e-01, -4.6928e-01, -4.4722e-01,
           1.2050e+00,  3.3144e-02],
         [ 1.0000e+00,  1.6065e+00,  8.3318e-01, -6.4214e-01, -1.1026e-01,
          -1.4283e+00,  2.1390e-02],
         [ 1.0000e+00, -9.8043e-02, -7.4761e-02, -1.0222e+00, -1.5147e+00,
           1.0295e+00,  3.7727e-02],
         [ 1.0000e+00,  1.0356e+00,  1.1872e+00,  8.2175e-01,  1.2530e+00,
           1.1223e+00,  5.2715e-03]],

        [[ 1.0000e+00, -9.3160e-01, -3.0177e-01, -4.6928e-01, -4.4722e-01,
           1.2050e+00,  3.3144e-02],
         [ 1.0000e+00,  1.6065e+00,  8.3318e-01, -6.4214e-01, -1.1026e-01,
          -1.4283e+00,  2.1390e-02],
         [ 1.0000e+00, -9.8043e-02, -7.4761e-02, -1.0222e+00, -1.5147e+00,
           1.0295e+00,  3.7727e-02],
         [ 1.0000e

<h3>Transformer Architecture</h3>

Note that the custom encoder contains:
- a self attention layer
- a feed forward layer (normal stuffnot ethat for the attention layer, the multihead attention needs the number of diemnsions to be divisible by the number of multi attention heads.

<h3>Model 1 (used for language)</h3> (theirs)

In [10]:
from torch.nn import TransformerEncoderLayer, TransformerEncoder
from code_repo.transformer import PositionalEncoding
import math

class TransformerModel(nn.Module):

    def __init__(self, ntoken: int, d_model: int, nhead: int, d_hid: int,
                 nlayers: int, dropout: float = 0.5):
        super().__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, d_model)
        self.d_model = d_model
        self.decoder = nn.Linear(d_model, ntoken)

        self.init_weights()

    def init_weights(self) -> None:
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src: torch.Tensor, src_mask: torch.Tensor) -> torch.Tensor:
        """
        Args:
            src: Tensor, shape [seq_len, batch_size]
            src_mask: Tensor, shape [seq_len, seq_len]

        Returns:
            output Tensor of shape [seq_len, batch_size, ntoken]
        """
        src = self.encoder(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, src_mask)
        output = self.decoder(output)
        return output


def generate_square_subsequent_mask(sz: int) -> torch.Tensor:
    """Generates an upper-triangular matrix of -inf, with zeros on diag."""
    return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)

class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x: Tensor, shape [seq_len, batch_size, embedding_dim]
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

<h3>Pytorch Adapter</h3>
<p>Currently this contains a premade transformer model from the torch fellows</p>

In [14]:
#note that this has to extract 'encoder_cont' from the input, then pass it to the saved transformer model

class StockTransformerModel(BaseModel):

    def __init__(self, d_model = 5, nhead = 5,
                 num_encoder_layers = 6,
                 num_decoder_layers = 6,
                 dim_feedforward = 100,
                 dropout = 0.1,
                 activation = "relu"):

        super().__init__()
        self.save_hyperparameters(d_model, num_encoder_layers,\
                                  num_decoder_layers, dim_feedforward,
                                  dropout)
        self.d_model = d_model
        self.nhead = nhead
        self.num_encoder_layers = num_encoder_layers
        self.num_decoder_layers = num_decoder_layers
        self.dim_feedforward = dim_feedforward
        self.dropout = dropout

        self.network = nn.Transformer(d_model = self.d_model,
                                      nhead = self.nhead,
                                      num_encoder_layers = self.num_encoder_layers,
                                      dim_feedforward= self.dim_feedforward,
                                      dropout= self.dropout,
                                      batch_first=True
                                      #activation= self.activation
                                      )

        #how many features are we passing through

        #custom_encoder = some class
        #custom_decoder = some class

        #feed forward already implemented
    def forward(self, x):
        #need to extract data from whats returned in batch
        new_x = x["encoder_cont"].squeeze(-1)
        new_y = x['decoder_cont'].squeeze(-1)

        pred = self.network(new_x, new_y)


        prediction = self.transform_output(pred, target_scale=x["target_scale"])
        return self.to_netowrk_output(prediction=pred)




In [15]:
model = StockTransformerModel()
model(x)

RuntimeError: the feature number of src and tgt must be equal to d_model

In [13]:
x

{'encoder_cat': tensor([], size=(16, 5, 0), dtype=torch.int64),
 'encoder_cont': tensor([[[ 1.0000e+00, -1.1349e+00, -1.0375e+00,  8.5500e-01, -7.7623e-01,
            1.0069e+00,  1.8451e-02],
          [ 1.0000e+00, -9.3160e-01, -3.0177e-01, -4.6928e-01, -4.4722e-01,
            1.2050e+00,  3.3144e-02],
          [ 1.0000e+00,  1.6065e+00,  8.3318e-01, -6.4214e-01, -1.1026e-01,
           -1.4283e+00,  2.1390e-02],
          [ 1.0000e+00, -9.8043e-02, -7.4761e-02, -1.0222e+00, -1.5147e+00,
            1.0295e+00,  3.7727e-02],
          [ 1.0000e+00,  1.0356e+00,  1.1872e+00,  8.2175e-01,  1.2530e+00,
            1.1223e+00,  5.2715e-03]],
 
         [[ 1.0000e+00, -9.3160e-01, -3.0177e-01, -4.6928e-01, -4.4722e-01,
            1.2050e+00,  3.3144e-02],
          [ 1.0000e+00,  1.6065e+00,  8.3318e-01, -6.4214e-01, -1.1026e-01,
           -1.4283e+00,  2.1390e-02],
          [ 1.0000e+00, -9.8043e-02, -7.4761e-02, -1.0222e+00, -1.5147e+00,
            1.0295e+00,  3.7727e-02],
     

In [122]:
x['encoder_cont'].size() + y[0].size()

torch.Size([16, 5, 7, 16, 1])

In [107]:
x['encoder_cont'].size()

NameError: name 'yx' is not defined

In [None]:
#train model
def train(model, data, batch_size=32,  learning_rate=0.1, momentum=0.9, total_epochs=10, weight_decay=0):
    criterion = nn.CrossEntropyLoss()
    optimizer= optim.Adam(model.parameters(), learning_rate=learning_rate, weight_decay=weight_decay)
    total_loss = 0
    losses = []
    iterations = []
    training_accuract = []
    validation_accuract = []

    num_iterations = 0
    for epoch in range(0, total_epochs):








In [87]:
model = StockTransformer()
model.parameters()

<generator object Module.parameters at 0x7f96c84cfc80>

In [40]:
src = torch.rand((10, 32, 6))
tgt = torch.rand((20,32,6))

In [41]:
out = model(src, tgt)

In [None]:
torch.rand((1,2,3))

In [31]:
out

tensor([[[ 1.8609e+00,  3.5001e-01,  5.2951e-01,  ..., -5.6568e-01,
          -3.1149e-01, -8.8702e-01],
         [ 1.3641e+00,  3.6736e-02,  6.4878e-01,  ...,  4.2819e-01,
          -7.7870e-01,  5.3546e-01],
         [ 1.5085e+00,  6.7698e-01,  5.3151e-01,  ...,  4.6038e-01,
           2.2322e-01, -1.2730e+00],
         ...,
         [ 1.8146e+00,  7.0981e-01,  4.2588e-01,  ...,  9.0969e-01,
           4.8604e-02,  1.7219e-01],
         [ 1.8847e+00,  6.4794e-02,  1.6133e-01,  ...,  1.7033e-01,
          -6.5030e-01,  1.1771e+00],
         [ 1.1817e+00,  6.8779e-01,  5.2459e-01,  ..., -3.5132e-01,
          -3.7258e-01, -5.6009e-02]],

        [[ 1.0215e+00,  7.4425e-01,  1.1591e+00,  ...,  2.7391e-01,
          -2.9277e-01, -1.4802e-01],
         [ 1.9489e+00,  1.6937e-01,  2.9322e-01,  ...,  6.9300e-01,
          -1.0268e+00,  3.5865e-01],
         [ 1.4858e+00,  6.8375e-01,  1.0703e+00,  ...,  7.4776e-01,
           2.1364e-01,  4.6723e-02],
         ...,
         [ 1.9812e+00,  7

In [71]:
from pytorch_forecasting.data.examples import generate_ar_data

timesteps = 1000

data = generate_ar_data(seasonality=10.0, timesteps=timesteps, n_series=100, seed=42)
data["static"] = 2
data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D")

data.series = data.series.astype(str).astype("category")

max_encoder_length = 30
max_prediction_length = 15

cutoff = timesteps * 0.70
train_data = data[data["time_idx"] <= cutoff]
test_data = data[data["time_idx"] > cutoff]

training = TimeSeriesDataSet(
    train_data,
    time_idx="time_idx",
    target="value",
#      categorical_encoders={"series": NaNLabelEncoder().fit(train_data.series)},
    group_ids=["series"],
    time_varying_unknown_reals=["value"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    # allow_missing_timesteps=True,
)

In [None]:
    def __init__(self, d_model: int, nhead: int, d_hid: int,
                 nlayers: int, dropout: float = 0.5):
        super().__init__()
        self.model_type = 'Transformer'
        self.embedding = nn.Linear(5, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout, batch_first=True)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.d_model = d_model
        self.decoder = nn.Linear(d_model * 2, 1)

        self.init_weights()

In [None]:
#sloan stuff

In [None]:
def generate_square_subsequent_mask(sz: int) -> Tensor:
    """Generates an upper-triangular matrix of -inf, with zeros on diag."""
    return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)

class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x: Tensor, shape [batch_size, seq_len, embedding_dim]
        """
        x = x + self.pe[:,:x.size(1)]
        return self.dropout(x)

class TransformerModel(nn.Module):
    def __init__(self, d_model: int, nhead: int, d_hid: int,
                 nlayers: int, dropout: float = 0.5):
        super().__init__()
        self.model_type = 'Transformer'
        self.embedding = nn.Linear(5, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout, batch_first=True)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.d_model = d_model
        self.decoder = nn.Linear(d_model * 2, 1)

        self.init_weights()

    def init_weights(self) -> None:
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src: Tensor, src_mask: Tensor) -> Tensor:
        """
        Args:
            src: Tensor, shape [batch_size, seq_len]
            src_mask: Tensor, shape [seq_len, seq_len]

        Returns:
            output Tensor of shape [batch_size, seq_len, ntoken]
        """
        src = self.embedding(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, src_mask)
        output = torch.concat([torch.max(output, dim=1)[0], torch.mean(output, dim=1)], dim=1)
        output = self.decoder(output)
        return output

In [46]:
test_data = pd.DataFrame(
    dict(
        value=np.random.rand(30) - 0.5,
        group=np.repeat(np.arange(3), 10),
        time_idx=np.tile(np.arange(10), 3),
    )
)
test_data

Unnamed: 0,value,group,time_idx
0,0.104145,0,0
1,-0.408156,0,1
2,-0.02765,0,2
3,0.468365,0,3
4,-0.188278,0,4
5,-0.173244,0,5
6,-0.285444,0,6
7,-0.124239,0,7
8,-0.262205,0,8
9,0.174174,0,9


In [48]:
# create the dataset from the pandas dataframe
dataset = TimeSeriesDataSet(
    test_data,
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    time_varying_unknown_reals=["value"])

In [49]:
dataloader = dataset.to_dataloader(batch_size=4)

# and load the first batch
x, y = next(iter(dataloader))
print("x =", x)
print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")

x = {'encoder_cat': tensor([], size=(4, 5, 0), dtype=torch.int64), 'encoder_cont': tensor([[[ 0.1013],
         [ 1.8939],
         [-0.4792],
         [-0.4248],
         [-0.8303]],

        [[-1.3947],
         [ 0.9272],
         [-0.2640],
         [-0.0974],
         [-1.3527]],

        [[-1.2738],
         [ 0.1013],
         [ 1.8939],
         [-0.4792],
         [-0.4248]],

        [[ 0.3936],
         [ 1.3880],
         [-0.9743],
         [-1.3576],
         [ 0.6902]]]), 'encoder_target': tensor([[-0.0276,  0.4684, -0.1883, -0.1732, -0.2854],
        [-0.4416,  0.2009, -0.1288, -0.0826, -0.4300],
        [-0.4082, -0.0276,  0.4684, -0.1883, -0.1732],
        [ 0.0532,  0.3284, -0.3253, -0.4314,  0.1353]]), 'encoder_lengths': tensor([5, 5, 5, 5]), 'decoder_cat': tensor([], size=(4, 2, 0), dtype=torch.int64), 'decoder_cont': tensor([[[-0.2477],
         [-0.7463]],

        [[-1.6040],
         [ 1.6029]],

        [[-0.8303],
         [-0.2477]],

        [[-0.2954],
   

  target_scale = torch.tensor([batch[0]["target_scale"] for batch in batches], dtype=torch.float)


In [None]:
class Time2Vector(Layer):
  def __init__(self, seq_len, **kwargs):
    super(Time2Vector, self).__init__()
    self.seq_len = seq_len

  def build(self, input_shape):
    self.weights_linear = self.add_weight(name='weight_linear',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)

    self.bias_linear = self.add_weight(name='bias_linear',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)

    self.weights_periodic = self.add_weight(name='weight_periodic',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)

    self.bias_periodic = self.add_weight(name='bias_periodic',
                                shape=(int(self.seq_len),),
                                initializer='uniform',
                                trainable=True)

  def call(self, x):
    x = tf.math.reduce_mean(x[:,:,:4], axis=-1) # Convert (batch, seq_len, 5) to (batch, seq_len)
    time_linear = self.weights_linear * x + self.bias_linear
    time_linear = tf.expand_dims(time_linear, axis=-1) # (batch, seq_len, 1)

    time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
    time_periodic = tf.expand_dims(time_periodic, axis=-1) # (batch, seq_len, 1)
    return tf.concat([time_linear, time_periodic], axis=-1) # (batch, seq_len, 2)