In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from tqdm.notebook import trange, tqdm

In [2]:
# DATASET
class LSTMDataset(Dataset):
    def __init__(self, data, labels, sequence_length, transform=None):
        self.data = data
        self.labels = labels
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # X: shape (sequence_length, num_features)
        X = self.data[idx : idx + self.sequence_length]
        # y: label at the next time step
        y = self.labels[idx + self.sequence_length]
        return X, y

In [5]:
path = 'inputs/A'

# TODO: Min-max/normalize data
# TODO: Generalize to other stocks

train_loaders = [] # [dataloader1, dataloader2, ...]
test_loaders = []

for i in range(1, 16):
    data_df = pd.read_csv(f'{path}/{i}.csv', index_col=0)
    data_df.drop(columns=['timestamp'], inplace=True)

    X_df = data_df.drop(columns='label')
    y_df = data_df['label']

    X = torch.from_numpy(X_df.values)
    y = torch.from_numpy(y_df.values)

    dataloader = DataLoader(LSTMDataset(X, y, sequence_length=20), batch_size=64, shuffle=True)

    train_loaders.append(dataloader)
    test_loaders.append(dataloader)

## Training

In [None]:
from LSTM import LSTM
from LSTM  import LSTMTrainer

In [None]:
# MODEL
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.mps.is_available() else 'cpu'

model = LSTM(input_size=10, hidden_size=64, num_layers=1, dropout=0.2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

trainer = LSTMTrainer(model, train_loaders, test_loaders, optimizer, criterion, device, num_epochs=10)

In [None]:
# TRAINING
trainer.train()