# Multi-asset forecasting with an LSTM

This notebook contains code for training and forecasting 22 stocks from the OMXS30 index
with an LSTM model.

Import data and create the dataloader

In [None]:
import pandas as pd
from data.MultiAssetDataset import MultiAssetDataset
from torch.utils.data import DataLoader

df = pd.read_csv("data/OMXS22_model_features_raw.csv", index_col = "Date", parse_dates = True)

tickers = df["Ticker"].unique().tolist()
features = ["Return"] #,"Volume","SMA20","EMA20","RSI14"]
target_col = "Return"
window   = 60
horizon  = 1

scaler_means = df[features].mean()
scaler_stds  = df[features].std()

df[features] = (df[features] - scaler_means) / scaler_stds

train_stop = pd.Timestamp("2020-01-01")
val_stop   = pd.Timestamp("2023-12-31")

df_train = df[df.index < train_stop].copy()
df_val   = df[(df.index >= train_stop) & (df.index < val_stop)].copy()
df_test  = df[df.index >= val_stop].copy()


ds_train = MultiAssetDataset(df_train, tickers, features, target_col, window, horizon)
ds_val   = MultiAssetDataset(df_val,   tickers, features, target_col, window, horizon)
ds_test  = MultiAssetDataset(df_test,  tickers, features, target_col, window, horizon)

train_loader = DataLoader(ds_train, batch_size=32, shuffle=True)
val_loader   = DataLoader(ds_val,   batch_size=32)
test_loader  = DataLoader(ds_test,  batch_size=32)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from multi_asset_models.MultiAssetLSTM import MultiAssetLSTM
from train.Trainer import Trainer

model = MultiAssetLSTM(
    n_assets=len(tickers),
    n_features=len(features),
    hidden_size=32,
    num_layers=3,
    dropout=0.3,
    horizon=horizon
)

criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


num_epochs   = 50
patience     = num_epochs
save_path    = "best_multiasset_lstm.pt"

trainer = Trainer(model, optimizer, criterion)
history = trainer.fit(
    train_loader = train_loader,
    val_loader   = val_loader,
    epochs       = num_epochs,
    patience     = patience,
    save_path    = save_path
)

model.load_state_dict(torch.load(save_path))
test_loss = trainer.eval_epoch(test_loader)
print(f"Test loss: {test_loss:.4f}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

epochs = np.arange(1, len(history['train_loss']) + 1)
plt.figure()
plt.plot(epochs, history['train_loss'], label='Train loss')
if 'val_loss' in history:
    plt.plot(epochs, history['val_loss'], label='Val loss')
plt.xlabel('Epoch')
plt.ylabel('MSE loss')
plt.title('Training history')
plt.legend()
plt.show()

In [None]:
model.eval()
all_preds = []
all_trues = []

with torch.no_grad():
    for Xb, yb in test_loader:
        preds = model(Xb)  # (B, A, H)
        # välj asset 0 och horizon 0 för plottning
        all_preds.append(preds[:, 0, 0].cpu().numpy())
        all_trues.append(yb[:, 0, 0].cpu().numpy())

# Konkatenera

all_preds = np.concatenate(all_preds)
all_trues = np.concatenate(all_trues)

all_preds = all_preds #* scaler_stds[target_col] + scaler_means[target_col]
all_trues = all_trues #* scaler_stds[target_col] + scaler_means[target_col]


n_plot = 2000
plt.figure()
plt.plot(all_trues[:n_plot], label='Actual')
plt.plot(all_preds[:n_plot], label='Predicted')
plt.xlabel('Sample index')
plt.ylabel('Cumulative return (asset 0)')
plt.title('Predicted vs Actual on Training Set (first asset)')
plt.legend()
plt.show()