In [1]:
import os
import pandas as pd
import torch
from lib.datasets import get_stock_price,sample_indices,train_test_split
from lib.aug import apply_augmentations,parse_augmentations
from typing import List
from torch import nn
from typing import List
import torch.nn.functional as F

In [2]:
data_config = {
    "ticker" : "^GSPC",
    "interval" : "1d",
    "column" : 1,  
    "window_size" : 20,
    "dir" : "datasets",
    "subdir" : "stock"
}
sig_config = {
    "augmentations": [
        {"name": "AddTime"},
        {"name": "LeadLag"},
    ],
    "device" : "cuda:0",
    "depth" : 4,
}

In [3]:
tensor_data = get_stock_price(data_config)
x_real_train, x_real_test = train_test_split(tensor_data, train_test_ratio=0.8, device=sig_config["device"])
if sig_config["augmentations"] is not None:
    sig_config["augmentations"] = parse_augmentations(sig_config.get('augmentations'))
print("Before augmentation shape:",x_real_train.shape)
if sig_config["augmentations"] is not None:
    # Print the tensor shape after each augmentation
    x_aug_sig = apply_augmentations(x_real_train,sig_config["augmentations"])
    input_dim = x_aug_sig.shape[1]*x_aug_sig.shape[2]
print("After augmentation shape:",x_aug_sig.shape)
x_aug_sig = x_aug_sig.to(sig_config["device"])

Rolled data for training, shape torch.Size([1232, 20, 1])
Before augmentation shape: torch.Size([985, 20, 1])
torch.Size([985, 20, 2])
torch.Size([985, 39, 4])
After augmentation shape: torch.Size([985, 39, 4])


In [None]:
class VAE(nn.Module):
    def __init__(self, x_aug_sig, epoch, batch_size, hidden_dims: List, device) -> None:
        super(VAE, self).__init__()

        self.x_aug_sig = x_aug_sig
        print("Inpust tensor shape: {}".format(x_aug_sig.shape))
        self.epoch = epoch
        self.batch_size = batch_size
        self.device = device

        modules = []
        for i in range(len(hidden_dims)-1):
            modules.append(
                nn.Sequential(
                    nn.Linear(hidden_dims[i],hidden_dims[i+1]),
                    nn.LeakyReLU(),
                )
            )
        self.encoder_mu = nn.Sequential(*modules)
        modules = []
        for i in range(len(hidden_dims)-1):
            modules.append(
                nn.Sequential(
                    nn.Linear(hidden_dims[i],hidden_dims[i+1]),
                    nn.LeakyReLU(),
                )
            )
        self.encoder_sigma = nn.Sequential(*modules)

        modules = []
        for i in range(len(hidden_dims)-1,0,-1):
            modules.append(
                nn.Sequential(
                    nn.Linear(hidden_dims[i],hidden_dims[i-1]),
                    nn.LeakyReLU(),
                )
            )
        self.decoder = nn.Sequential(*modules)

        # To device
        self.encoder_mu.to(device)
        self.encoder_sigma.to(device)
        self.decoder.to(device)
    
    def encode(self, x):
        x_flatten = x.view(self.batch_size,-1)
        mean = self.encoder_mu(x_flatten)
        log_var = self.encoder_sigma(x_flatten)
        noise = torch.randn(self.batch_size,mean.shape[1]).to(self.device)
        z = mean + torch.exp(0.5*log_var).mul(noise)
        return mean, log_var, z
        
    def decode(self,z):
        reconstructed_data = self.decoder(z)
        return reconstructed_data

    def loss(self,mean,log_var,sample_data,reconstructed_data):
        # Reconstruction loss 
        recon_loss = F.mse_loss(sample_data, reconstructed_data, reduction='sum')
        # KL divergence
        kl_loss = 0.5 * torch.sum(mean.pow(2) + log_var.exp() - 1 - log_var)
        # Total VAE loss
        loss = recon_loss + kl_loss
        return loss
    
def train(model,optimizer):
    for i in range(model.epoch):
        # Sample time indices of size equal to the batch size.
        # From sefl.x_aug_sig
        time_indics = sample_indices(model.x_aug_sig.shape[0],model.batch_size,"cuda")
        sample_data = model.x_aug_sig[time_indics]
        # Encode 
        mean, log_var, z = model.encode(sample_data)
        # Decode
        reconstructed_data = model.decode(z)
        # Calculate loss
        loss = model.loss(mean,log_var,sample_data.view(model.batch_size,-1),reconstructed_data)
        # Backpropogation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Print loss
        if i%10==0:
            print("Epcho {} loss {}".format(i,loss.item()))


In [5]:
lr = 1e-4
batch_size = 128
epoch = 100
hidden_dims = [input_dim,60,20]
VAE = VAE(x_aug_sig=x_aug_sig,epoch=epoch,batch_size=batch_size,hidden_dims=hidden_dims,device='cuda')
print(VAE)
optimizer = torch.optim.Adam(VAE.parameters(),lr=lr)

Inpust tensor shape: torch.Size([985, 39, 4])
VAE(
  (encoder_mu): Sequential(
    (0): Sequential(
      (0): Linear(in_features=156, out_features=60, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Linear(in_features=60, out_features=20, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
  )
  (encoder_sigma): Sequential(
    (0): Sequential(
      (0): Linear(in_features=156, out_features=60, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Linear(in_features=60, out_features=20, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
  )
  (decoder): Sequential(
    (0): Sequential(
      (0): Linear(in_features=20, out_features=60, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Linear(in_features=60, out_features=156, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
  )
)


In [6]:
train(VAE,optimizer=optimizer)

Epcho 0 loss nan
Epcho 1 loss nan
Epcho 2 loss nan
Epcho 3 loss nan
Epcho 4 loss nan
Epcho 5 loss nan
Epcho 6 loss nan
Epcho 7 loss nan
Epcho 8 loss nan
Epcho 9 loss nan
Epcho 10 loss nan
Epcho 11 loss nan
Epcho 12 loss nan
Epcho 13 loss nan
Epcho 14 loss nan
Epcho 15 loss nan
Epcho 16 loss nan
Epcho 17 loss nan
Epcho 18 loss nan
Epcho 19 loss nan
Epcho 20 loss nan
Epcho 21 loss nan
Epcho 22 loss nan
Epcho 23 loss nan
Epcho 24 loss nan
Epcho 25 loss nan
Epcho 26 loss nan
Epcho 27 loss nan
Epcho 28 loss nan
Epcho 29 loss nan
Epcho 30 loss nan
Epcho 31 loss nan
Epcho 32 loss nan
Epcho 33 loss nan
Epcho 34 loss nan
Epcho 35 loss nan
Epcho 36 loss nan
Epcho 37 loss nan
Epcho 38 loss nan
Epcho 39 loss nan
Epcho 40 loss nan
Epcho 41 loss nan
Epcho 42 loss nan
Epcho 43 loss nan
Epcho 44 loss nan
Epcho 45 loss nan
Epcho 46 loss nan
Epcho 47 loss nan
Epcho 48 loss nan
Epcho 49 loss nan
Epcho 50 loss nan
Epcho 51 loss nan
Epcho 52 loss nan
Epcho 53 loss nan
Epcho 54 loss nan
Epcho 55 loss nan
Ep