In [None]:
from datetime import timedelta

import torch
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, LSTM

In [None]:
%matplotlib inline

In [None]:
plt.rcParams["figure.figsize"] = [12, 8]
plt.rcParams["figure.dpi"] = 100
plt.style.use("ggplot")

In [None]:
torch.cuda.is_available()

In [None]:
HORIZON = 12

### Data

In [None]:
stockprices_df = pl.read_parquet("../data/stock_prices/**/*.parquet")

In [None]:
stockprices_df.head()

In [None]:
filtered_pdf = stockprices_df.filter(
    pl.col("Ticker").is_in(["AAPL", "AMZN", "NFLX"])
).to_pandas()

sns.lineplot(filtered_pdf, x="Date", y="Close", hue="Ticker")

### Preprocessing

In [None]:
preprocessed_df = stockprices_df.select(
    pl.col("Date").cast(pl.Date).alias("ds"),
    pl.col("Ticker").alias("unique_id"),
    pl.col("Close").alias("y"),
)

preprocessed_df.head()

In [None]:
max_ds = preprocessed_df.select(pl.max("ds")).item()
cutoff_ds = max_ds - timedelta(days=HORIZON + 1)

train_df = preprocessed_df.filter(pl.col("ds") < cutoff_ds)
test_df = preprocessed_df.filter(pl.col("ds") >= cutoff_ds)

### Model training

In [None]:
models = [
    LSTM(
        h=HORIZON,
        max_steps=1000,
        scaler_type="standard",
        encoder_hidden_size=64,
        decoder_hidden_size=64,
    ),
    NHITS(
        h=HORIZON,
        input_size=2 * HORIZON,
        max_steps=1000,
        n_freq_downsample=[2, 1, 1],
    ),
]
nf = NeuralForecast(models=models, freq="d")
nf.fit(df=train_df.to_pandas())

### Predictions

In [None]:
predict_df = nf.predict().reset_index()
predict_df.head()

In [None]:
test_validation_df = (
    pl.from_pandas(predict_df)
    .with_columns([pl.col("ds").cast(pl.Date).alias("ds")])
    .join(test_df, on=["ds", "unique_id"])
)
test_validation_df.head()

In [None]:
timeseries_with_forecasts_df = pl.concat(
    [
        train_df,
        test_validation_df,
    ],
    how="align",
)

timeseries_with_forecasts_df.head()
model_names = list(
    set(timeseries_with_forecasts_df.columns) - set(["ds", "unique_id", "y"])
)
model_names

In [None]:
for ticker in ["AAPL", "AMZN", "NFLX"]:
    ticker_df = timeseries_with_forecasts_df.filter(
        pl.col("unique_id") == ticker
    ).filter(pl.col("ds") >= cutoff_ds - timedelta(days=30))
    sns.lineplot(data=ticker_df.to_pandas()[["y", *model_names]])