In [2]:
import pandas as pd
from torch import nn
import torch
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [3]:
root_folder = "data/processed"

In [4]:
# Data for inference should be loaded from online feature store
inference_df = pd.read_csv(f"../{root_folder}/inference.csv")
inference_df.head()

Unnamed: 0,Close,Hour,Minute,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,DayOfWeek_3,DayOfWeek_4
0,0.928917,15,29,0,0,0,0,1
1,0.921224,15,30,0,0,0,0,1
2,0.924498,15,31,0,0,0,0,1
3,0.925564,15,32,0,0,0,0,1
4,0.925523,15,33,0,0,0,0,1


In [5]:
# Helper function
def create_lag_features(df, lag=1):
    for i in range(1, lag + 1):
        df[f"lag_{i}"] = df["Close"].shift(i)
    df.dropna(inplace=True)
    return df

In [6]:
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(1, 64, batch_first=True)
        self.linear = nn.Linear(64, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # Get the output of the last timestep
        out = self.linear(lstm_out)
        return out

In [9]:
lag = 30

inference_df = create_lag_features(inference_df, lag)

model = LSTMModel()
# Model should be loaded from model registry
model.load_state_dict(torch.load("lstm_model.pth"))
model.eval()

LSTMModel(
  (lstm): LSTM(1, 64, batch_first=True)
  (linear): Linear(in_features=64, out_features=1, bias=True)
)

In [8]:
# Helper function
def get_next_minute(hour: int, minute: int, day_of_week: int):
    """Gets the next minute where S&P500 is open."""
    minute += 1

    if minute == 60:
        minute = 0
        hour += 1

        if hour == 16:
            hour = 9
            minute = 30
            day_of_week = (day_of_week + 1) % 5

    return hour, minute, day_of_week

In [10]:
# Helper function
def get_next_minute_row(df_row):
    """Move Close to lag_1, lag_1 to lag_2, etc. set the time (DayOfWeek, Hour and Minute) to
    the next minute where S&P500 is open."""

    for i in range(lag, 1, -1):
        df_row[f"lag_{i}"] = df_row[f"lag_{i - 1}"]

    df_row["lag_1"] = df_row["Close"]

    day_of_week = -1
    for i in range(0, 5):
        if df_row[f"DayOfWeek_{i}"] == 1:
            day_of_week = i
            df_row[f"DayOfWeek_{i}"] = 0
            break

    next_min, next_hour, next_day_of_week = get_next_minute(
        df_row["Hour"], df_row["Minute"], day_of_week
    )

    df_row["Hour"] = next_hour
    df_row["Minute"] = next_min
    df_row[f"DayOfWeek_{next_day_of_week}"] = 1

    return df_row

In [11]:
# scaler params should be loaded from feature store
import json

with open("../data/processed/scaler_params.json", "r") as f:
    scaler_params = json.load(f)

scaler = MinMaxScaler()
scaler.min_, scaler.scale_ = scaler_params["min_"], scaler_params["scale_"]
scaler.data_min_, scaler.data_max_ = (
    scaler_params["data_min"],
    scaler_params["data_max"],
)
scaler.data_range_ = scaler_params["data_range"]

FileNotFoundError: [Errno 2] No such file or directory: 'data/processed/scaler_params.json'

In [58]:
forecast_length = 10

last_minute_row = inference_df.iloc[-1].copy()

with torch.no_grad():
    for i in range(forecast_length):
        next_minute_row = get_next_minute_row(last_minute_row.copy())
        next_minute_row = next_minute_row.drop("Close")
        input_tensor = torch.tensor(
            next_minute_row.values.reshape(1, lag + 7, 1), dtype=torch.float32
        )
        forecast = model(input_tensor).item()
        print(f"Day {i + 1}: {scaler.inverse_transform([[forecast]])[0][0]}")
        next_minute_row["Close"] = forecast
        last_minute_row = next_minute_row

Day 1: 5428.233522772789
Day 2: 5428.180880308151
Day 3: 5428.444394826889
Day 4: 5428.239989757538
Day 5: 5428.505559206009
Day 6: 5428.232827723026
Day 7: 5428.382595837116
Day 8: 5427.917637765408
Day 9: 5428.105784714222
Day 10: 5427.675216495991
