In [1]:
import os
import pandas as pd
import torch
from lib.datasets import get_stock_price,sample_indices,train_test_split
from lib.aug import apply_augmentations,parse_augmentations
from typing import List
from torch import nn
from typing import List
import torch.nn.functional as F

In [2]:
data_config = {
    "ticker" : "^GSPC",
    "interval" : "1d",
    "column" : 1,  
    "window_size" : 20,
    "dir" : "datasets",
    "subdir" : "stock"
}
sig_config = {
    "augmentations": [
        {"name": "AddTime"},
        {"name": "LeadLag"},
    ],
    "device" : "cuda:0",
    "depth" : 4,
}

In [3]:
tensor_data = get_stock_price(data_config)
x_real_train, x_real_test = train_test_split(tensor_data, train_test_ratio=0.8, device=sig_config["device"])
if sig_config["augmentations"] is not None:
    sig_config["augmentations"] = parse_augmentations(sig_config.get('augmentations'))
print("Before augmentation shape:",x_real_train.shape)
if sig_config["augmentations"] is not None:
    # Print the tensor shape after each augmentation
    x_aug_sig = apply_augmentations(x_real_train,sig_config["augmentations"])
    input_dim = x_aug_sig.shape[1]*x_aug_sig.shape[2]
print("After augmentation shape:",x_aug_sig.shape)
x_aug_sig = x_aug_sig.to(sig_config["device"])

Rolled data for training, shape torch.Size([1232, 20, 1])
Before augmentation shape: torch.Size([985, 20, 1])
torch.Size([985, 20, 2])
torch.Size([985, 39, 4])
After augmentation shape: torch.Size([985, 39, 4])


In [4]:
class VAE(nn.Module):
    def __init__(self, x_aug_sig, epoch, batch_size, hidden_dims: List, device) -> None:
        super(VAE, self).__init__()

        self.x_aug_sig = x_aug_sig
        print("Input tensor shape: {}".format(x_aug_sig.shape))
        self.epoch = epoch
        self.batch_size = batch_size
        self.device = device

        # Assume len(hidden_dims)=3.
        self.encoder = nn.Sequential(
            nn.Linear(hidden_dims[0],hidden_dims[1]),
            nn.Linear(hidden_dims[1],hidden_dims[2]),
            nn.LeakyReLU(),
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dims[2],hidden_dims[1]),
            nn.Linear(hidden_dims[1],hidden_dims[0]),
            nn.LeakyReLU(),
        )

        # To device
        self.encoder.to(device)
        self.decoder.to(device)
    
    def encode(self, x):
        x_flatten = x.view(self.batch_size,-1)
        mean = self.encoder(x_flatten)
        log_var = self.encoder(x_flatten)
        noise = torch.randn(self.batch_size,mean.shape[1]).to(self.device)
        z = mean + torch.exp(0.5*log_var).mul(noise)
        return mean, log_var, z
        
    def decode(self,z):
        reconstructed_data = self.decoder(z)
        return reconstructed_data

    def loss(self,mean,log_var,sample_data,reconstructed_data):
        # Reconstruction loss 
        recon_loss = F.mse_loss(sample_data, reconstructed_data, reduction='sum')
        # print(recon_loss.item())
        # KL divergence
        kl_loss = 0.5 * torch.sum(mean.pow(2) + log_var.exp() - 1 - log_var)
        # Total VAE loss
        loss = recon_loss + kl_loss
        return loss
    
def train(model,optimizer):
    for i in range(model.epoch):
        # Sample time indices of size equal to the batch size.
        # From sefl.x_aug_sig
        time_indics = sample_indices(model.x_aug_sig.shape[0],model.batch_size,"cuda")
        sample_data = model.x_aug_sig[time_indics]
        # Encode 
        mean, log_var, z = model.encode(sample_data)
        # Decode
        reconstructed_data = model.decode(z)
        # Calculate loss
        loss = model.loss(mean,log_var,sample_data.view(model.batch_size,-1),reconstructed_data)
        # Backpropogation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Print loss
        if i%10==0:
            print("Epoch {} loss {}".format(i,loss.item()))


In [5]:
lr = 1e-1
batch_size = 128
epoch = 101
hidden_dims = [input_dim,60,20]
VAE = VAE(x_aug_sig=x_aug_sig,epoch=epoch,batch_size=batch_size,hidden_dims=hidden_dims,device='cuda')
print(VAE)
optimizer = torch.optim.Adam(VAE.parameters(),lr=lr)

Input tensor shape: torch.Size([985, 39, 4])
VAE(
  (encoder): Sequential(
    (0): Linear(in_features=156, out_features=60, bias=True)
    (1): Linear(in_features=60, out_features=20, bias=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (decoder): Sequential(
    (0): Linear(in_features=20, out_features=60, bias=True)
    (1): Linear(in_features=60, out_features=156, bias=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
)


In [6]:
train(VAE,optimizer=optimizer)

Epoch 0 loss nan
Epoch 10 loss nan
Epoch 20 loss nan
Epoch 30 loss nan
Epoch 40 loss nan
Epoch 50 loss nan
Epoch 60 loss nan
Epoch 70 loss nan
Epoch 80 loss nan
Epoch 90 loss nan
Epoch 100 loss nan
