# train models

## 1. Train deep learning model

### 1.1 Load & process data

In [1]:
from deepLearning.dataloader import MacroData
from torch.utils.data import DataLoader
import torch
import pandas as pd
import utils
import numpy as np


na_thresh = 500
df = pd.read_pickle("../../data/macro/consolidated_macro_data.pkl")
df = df.dropna(thresh=na_thresh, axis=1).dropna(axis=0)
df = df.T.drop_duplicates().T
processor = utils.PreProcessor(df)

md = MacroData(48, processor.train, pad_val=-1e10)
DEVICE = torch.device("cpu")
DTYPE = torch.float32
def collate_fn(input, device=DEVICE, dtype=DTYPE):
    """collate inputs batch first"""
    idxs, data_ls, paddings = [], [], []
    for idx, data, padding_mask in input:
        idxs.append(idx)
        data_ls.append(data)
        paddings.append(padding_mask)
    return (
        idxs,
        torch.tensor(np.array(data_ls), dtype=dtype).to(device),
        torch.tensor(paddings, dtype=torch.bool).to(device)
    )
dataloader = DataLoader(md, batch_size=32, collate_fn=collate_fn)

  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])
  res[offset:] = np.log(input[offset:] / input[:-offset])


In [2]:
import torch.nn as nn
from typing import Tuple


class AutoEncoder(nn.Module):
    def __init__(self, 
                 window_size: int=60,
                 num_transformer_layers: int=3,
                 nhead: int=10,
                 dim: int=60,
                 encoding_dims: int=5):
        super().__init__()
        self.window_size = window_size
        self.dim = dim
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=nhead, batch_first=True)
        decoder_layer = nn.TransformerDecoderLayer(d_model=dim, nhead=nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer=encoder_layer, num_layers=num_transformer_layers)
        self.linear_encoder = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Linear(dim * window_size, dim * window_size // 4),
            nn.Linear(dim * window_size // 4, dim * window_size // 4 ** 2),
            nn.Linear(dim * window_size // 4 ** 2, dim * window_size // 4 ** 3),
            nn.Linear(dim * window_size // 4 ** 3, encoding_dims)
        )
        self.linear_decoder = nn.Sequential(
            nn.Linear(encoding_dims, dim * window_size // 4 ** 3),
            nn.Linear(dim * window_size // 4 ** 3, dim * window_size // 4 ** 2),
            nn.Linear(dim * window_size // 4 ** 2, dim * window_size // 4),
            nn.Linear(dim * window_size // 4, dim * window_size),
            nn.Unflatten(-1, (self.window_size, self.dim)),
        )
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer=decoder_layer, num_layers=num_transformer_layers)
        self.tanh = nn.Tanh()

    def encode(self, x: torch.tensor, padding_mask: torch.tensor) -> Tuple[torch.tensor]:
        x_ = self.transformer_encoder(x, src_key_padding_mask=padding_mask)
        # x_ = torch.flatten(x_, 1, 2)
        z = self.linear_encoder(x_)
        z = self.tanh(z)
        return x_, z
    
    def decode(self, z: torch.tensor, memory: torch.tensor) -> torch.tensor:
        y_ = self.linear_decoder(z)
        y = self.transformer_decoder(y_, memory=memory)
        return y

    def forward(self, x, padding_mask: torch.tensor) -> torch.tensor:
        x_, z = self.encode(x, padding_mask=padding_mask)
        y_ = self.decode(z, x_)
        return y_
    
    def __call__(self, x, padding_mask) -> torch.tensor:
        return self.forward(x, padding_mask)


In [57]:
ae = AutoEncoder(window_size=48, num_transformer_layers=6, encoding_dims=10)

In [58]:
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader, TensorDataset
from torch.nn import CrossEntropyLoss
from torch.utils.tensorboard import SummaryWriter


def get_grads(model: nn.Module):
    grads = {n:p.grad.cpu() for n, p in model.named_parameters()}
    return grads

lr = 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_epochs = 100
criterion = nn.MSELoss()
optimizer = AdamW(ae.parameters(), lr=lr, betas=(0.9, 0.99), eps=1e-4)

Logger = SummaryWriter(log_dir=f"./deepLearning/logs/lr={lr}")

for epoch in range(num_epochs):
    for inputs in dataloader:
        idx, x, padding_mask = inputs
        x_ = x.to(device)
        inverted_mask = (torch.tensor(1) - padding_mask.unsqueeze(-1).to(torch.int)) 
        x_ = inverted_mask * x_ # mask padding tokens to 0 for loss calculation
        y = ae(x, padding_mask)
        y_ = inverted_mask * y

        loss = criterion(x_, y_)
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

    Logger.add_scalar("train/loss", loss, global_step=epoch)
    grads = get_grads(ae)
    for grad_name, grad in grads.items():
        Logger.add_histogram(f"train/gradient/{grad_name}", grad, global_step=epoch)

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

  torch.tensor(paddings, dtype=torch.bool).to(device)


Epoch [1/100], Loss: 4.6749
Epoch [2/100], Loss: 4.4685
Epoch [3/100], Loss: 4.2353
Epoch [4/100], Loss: 4.1520
Epoch [5/100], Loss: 4.0272
Epoch [6/100], Loss: 3.9770
Epoch [7/100], Loss: 3.9386
Epoch [8/100], Loss: 4.0272
Epoch [9/100], Loss: 3.8498
Epoch [10/100], Loss: 3.8264
Epoch [11/100], Loss: 3.7798
Epoch [12/100], Loss: 3.7513
Epoch [13/100], Loss: 3.7654
Epoch [14/100], Loss: 3.7576
Epoch [15/100], Loss: 4.0858
Epoch [16/100], Loss: 3.7839
Epoch [17/100], Loss: 3.6610
Epoch [18/100], Loss: 3.5280
Epoch [19/100], Loss: 3.5304
Epoch [20/100], Loss: 3.4441
Epoch [21/100], Loss: 3.4517
Epoch [22/100], Loss: 3.3966
Epoch [23/100], Loss: 3.3562
Epoch [24/100], Loss: 3.4214
Epoch [25/100], Loss: 3.4004
Epoch [26/100], Loss: 3.4584
Epoch [27/100], Loss: 3.3241
Epoch [28/100], Loss: 3.2886
Epoch [29/100], Loss: 3.2829
Epoch [30/100], Loss: 3.3207
Epoch [31/100], Loss: 3.3287
Epoch [32/100], Loss: 3.3578
Epoch [33/100], Loss: 3.3046
Epoch [34/100], Loss: 3.3268
Epoch [35/100], Loss: 3

In [13]:
_, z = ae.encode(x, padding_mask)

In [52]:
def predict(indx: int):
    _, x, mask = md[indx]
    x_ = torch.tensor(x, dtype=DTYPE).unsqueeze(-1).permute((2, 0, 1))
    mask_ = torch.tensor(mask, dtype=torch.bool).unsqueeze(-1).permute(1, 0)
                                            
    _, z = ae.encode(x_, mask_
                  )

    return z

In [56]:
predict(50)

tensor([[-1.0000,  0.9999, -1.0000, -1.0000,  1.0000]],
       grad_fn=<TanhBackward0>)