In [55]:
import pandas as pd

data = pd.read_csv("./sp500.csv")
# set date to be the index
data = data.set_index("Date")

data

Unnamed: 0_level_0,Open,High,Low,Close,5d,10d,20d
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-07-14,0.037816,0.036900,0.044390,0.041064,0.033454,0.027488,0.022811
2014-07-15,0.039894,0.037642,0.043134,0.040004,0.033993,0.027862,0.023326
2014-07-16,0.039614,0.038036,0.046004,0.042304,0.034485,0.028098,0.023901
2014-07-17,0.040557,0.037442,0.040425,0.035798,0.034115,0.027625,0.023918
2014-07-18,0.035510,0.036917,0.041878,0.041374,0.034715,0.027419,0.024190
...,...,...,...,...,...,...,...
2024-06-10,0.972091,0.977374,0.978436,0.979762,0.982198,0.983501,0.991455
2024-06-11,0.975356,0.980196,0.977249,0.983793,0.986923,0.985486,0.993690
2024-06-12,0.990910,1.000000,1.000000,0.996474,0.990693,0.989902,0.996223
2024-06-13,1.000000,0.998522,0.998161,1.000000,0.995239,0.995584,0.998047


In [56]:
import numpy as np

In [57]:
WINDOW_SIZE = 10
X, y = [[], []]

for i in range(len(data) - WINDOW_SIZE):
    X.append(data.iloc[i : i + WINDOW_SIZE].values)
    y.append(data["Close"].iloc[i + WINDOW_SIZE])  # next day close price

X = np.array(X).astype(np.float32)
y = np.array(y).astype(np.float32)

In [58]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

y_train.shape

(1991,)

In [59]:
import torch
import torch.nn as nn

In [60]:
device = torch.device("cpu")

if torch.cuda.is_available():
    print("Using CUDA")
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    print("Using MPS")
    device = torch.device("mps")

Using MPS


In [61]:
class OtwayModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(OtwayModel, self).__init__()
        self.lstm = nn.LSTM(input_size, 64, 2, batch_first=True)
        self.fc = nn.Linear(64, output_size)  # arbitrary

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return x

In [62]:
model = OtwayModel(X_train.shape[2], 1).to(device)
optimiser = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

In [63]:
X_train_tensor = torch.tensor(X_train, device=device)
y_train_tensor = torch.tensor(y_train, device=device).reshape(-1, 1)

In [64]:
y_train_tensor.shape

torch.Size([1991, 1])

In [65]:
model.eval()

OtwayModel(
  (lstm): LSTM(7, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [66]:
for epoch in range(100):
    model.train()
    optimiser.zero_grad()
    output = model(X_train_tensor)
    loss = loss_fn(output, y_train_tensor)
    loss.backward()
    optimiser.step()
    print(f"Epoch {epoch}: Loss {loss.item()}")

Epoch 0: Loss 0.130100280046463
Epoch 1: Loss 0.11420092731714249
Epoch 2: Loss 0.09965336322784424
Epoch 3: Loss 0.08629310131072998
Epoch 4: Loss 0.07406680285930634
Epoch 5: Loss 0.06300035119056702
Epoch 6: Loss 0.05323681980371475
Epoch 7: Loss 0.04507584869861603
Epoch 8: Loss 0.039000559598207474
Epoch 9: Loss 0.03559408709406853
Epoch 10: Loss 0.03510995954275131
Epoch 11: Loss 0.036618273705244064
Epoch 12: Loss 0.03789425641298294
Epoch 13: Loss 0.03700738400220871
Epoch 14: Loss 0.0336187519133091
Epoch 15: Loss 0.028675828129053116
Epoch 16: Loss 0.023469531908631325
Epoch 17: Loss 0.018948135897517204
Epoch 18: Loss 0.015489425510168076
Epoch 19: Loss 0.01299824845045805
Epoch 20: Loss 0.01112702488899231
Epoch 21: Loss 0.00948273204267025
Epoch 22: Loss 0.007772872690111399
Epoch 23: Loss 0.005896132439374924
Epoch 24: Loss 0.0039994860999286175
Epoch 25: Loss 0.002492025727406144
Epoch 26: Loss 0.0019146300619468093
Epoch 27: Loss 0.0024428204633295536
Epoch 28: Loss 0.0

In [67]:
torch.save(model.state_dict(), "otway-model.pt")

In [None]:
# Evaluate model on test set
X_test_tensor = torch.tensor(X_test, device=device)
y_test_tensor = torch.tensor(y_test, device=device).reshape(-1, 1)
model.eval()
output = model(X_test_tensor)
loss = loss_fn(output, y_test_tensor)
print("Test set loss:", loss.item())