In [33]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import ast
import matplotlib.pyplot as plt

In [55]:
df = pd.read_csv('path/to/data')

In [40]:
class PriceForecastingDataset(Dataset):
    def __init__(self, dataframe):
        self.features = []
        self.targets = torch.tensor(dataframe[['price_t+1_scaled', 'price_t+2_scaled', 'price_t+3_scaled']].values, dtype=torch.float32)

        for i in range(24, 0, -1):
            col_name = f'scaledFeatures_t-{i}'
            self.features.append(
                np.array(dataframe[col_name].apply(ast.literal_eval).tolist())
            )

        self.features.append(
            np.array(dataframe['scaledFeatures'].apply(ast.literal_eval).tolist())
        )

        self.features = np.transpose(np.array(self.features), (1, 0, 2))
        self.features = torch.tensor(self.features, dtype=torch.float32)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]


In [41]:
dataset = PriceForecastingDataset(df)

In [None]:
dataset_length = len(dataset)
print("Total number of entries in the dataset:", dataset_length)

features, targets = dataset[0]
print("Shape of features:", features.shape)
print("Shape of targets:", targets.shape)


train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
print("Total number of entries in the dataset:", len(train_dataset))
print("Total number of entries in the dataset:", len(val_dataset))

In [43]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [44]:
class BidirectionalLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, df_mean, df_stddev, dropout_rate):

        super(BidirectionalLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True,
                            dropout=dropout_rate, bidirectional=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)

    def forward(self, x):

        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        out = (out * df_stddev) + df_mean
        return out

In [45]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [46]:
model = BidirectionalLSTM(input_dim=6, hidden_dim=50, num_layers=2, output_dim=3, df_mean=0.7984, df_stddev=0.0148,  dropout_rate=0.2)
total_params = count_trainable_parameters(model)
print(f"Total Trainable Parameters: {total_params}")
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Total Trainable Parameters: 84303


In [None]:
training_losses = []
validation_losses = []


num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    train_loss = []
    for inputs, labels in train_dataloader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
    avg_train_loss = sum(train_loss) / len(train_loss)
    training_losses.append(avg_train_loss)

    model.eval()
    with torch.no_grad():
        val_loss = []
        for inputs, labels in val_dataloader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss.append(loss.item())
        avg_val_loss = sum(val_loss) / len(val_loss)
        validation_losses.append(avg_val_loss)

    print(f'Epoch {epoch+1}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')



In [None]:
plt.figure(figsize=(10, 5))
plt.plot(training_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.title('Training and Validation Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
for inputs, labels in val_dataloader:
  outputs = model(inputs)
  print(labels)
  print(outputs)
  break