In [1]:
import sys
sys.path.append('..')

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import lightning as L

from sklearn.preprocessing import MinMaxScaler
import pandas as pd

from utils.timescale_connector import TimescaleConnector


In [2]:
df = TimescaleConnector.query_ohlcv_daily("ACB")

In [10]:
len(df)

1680

In [3]:
# Prepare for a time series dataset
# First test only with "close"
data = df[["open", "high", "low", "close", "volume"]].values

In [11]:
len(data)

120

In [6]:
# Scale the data to between -1 and 1, this matches the range of the tanh activation function
scaler = MinMaxScaler(feature_range=(-1, 1))
data = scaler.fit_transform(data.reshape(-1, 1))

In [4]:
# Lookback period
lookback = 10

In [6]:
# Convert the data to Torch tensor
data = torch.FloatTensor(data).reshape(-1, lookback, len(df.columns))


In [7]:
# Prepare the dataset
def create_inout_sequences(input_data: pd.DataFrame, tw: int) -> list:
    inout_seq = []
    length = len(input_data)
    for i in range(length-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq, train_label))
    return inout_seq

In [8]:
# Create the sequence
inout_seq = create_inout_sequences(data, lookback)

In [9]:
len(inout_seq)

110

In [12]:
# Define the LSTM model
from typing import Any
from lightning.pytorch.utilities.types import STEP_OUTPUT, OptimizerLRScheduler

class LSTM(L.LightningModule):
    def __init__(self, input_size=5, hidden_layer_size=5, output_size=5):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, len(df.columns)), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]  
    
    def configure_optimizers(self) -> OptimizerLRScheduler:
        return torch.optim.Adam(self.parameters(), lr=0.1)
    
    def training_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT:
        return super().training_step(*args, **kwargs)

In [14]:
# Init the model, define loss function and optimzier
model = LSTM()
loss_function = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [17]:
trainer = L.Trainer(max_epochs=1000, log_every_n_steps=2, accelerator="gpu")
trainer.fit(model=model, train_dataloaders=inout_seq, training_ste)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


MisconfigurationException: No `training_step()` method defined. Lightning `Trainer` expects as minimum a `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined.

In [None]:
# Train
epochs = 1000

for i in range(epochs):
    for seq, labels in inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (
            torch.zeros(1, 1, model.hidden_layer_size),
            torch.zeros(1, 1, model.hidden_layer_size),
        )
        
        y_pred = model(seq)
        
        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()
        
    if i%25 == 1:
        print(f"Epoch {i:3} loss: {single_loss.item():10.8f}")
        
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

In [22]:
batch_size = 100  # Set your desired batch size
dataloader = DataLoader(inout_seq, batch_size=batch_size)


def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [24]:
train_loop(dataloader, model, loss_function, optimizer)

RuntimeError: input.size(-1) must be equal to input_size. Expected 1, got 10