In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from datetime import datetime
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import MSTL

from bikes.preprocess.preprocess import Scaler, get_tensor_train_dataset

import torch
import torch.nn as nn

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["date"])

In [None]:
LOCATION = "Nelson Street"

location_df = cycle_counts.loc[cycle_counts["location"] == LOCATION].copy()
location_df = location_df.set_index("date").sort_index()

In [None]:
TEST_PERIOD = datetime(2024, 10, 1), datetime(2025, 1, 1)


def train_test_split(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
    test_start_idx, test_end_idx = df.index.get_indexer(TEST_PERIOD)
    train_df = df.iloc[:test_start_idx].copy()
    test_df = df.iloc[test_start_idx:test_end_idx].copy()
    return train_df, test_df

In [None]:
def plot_timeseries(actual: pd.Series, predicted: pd.Series):
    fig, ax = plt.subplots()
    
    ax.plot(actual, label="Observed")
    ax.plot(predicted, label="Predicted")
    
    ax.set(ylabel="Count")
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    ax.legend()
    
    fig.tight_layout();
    
    return ax

## Sequence Learner Models

In [None]:
FORECASTS: dict[str, pd.Series] = {}

TRAIN_DF, TEST_DF = train_test_split(location_df)

### Exponential Smoothing

In [None]:
from statsmodels.tsa.seasonal import MSTL
from statsmodels.tsa.api import ExponentialSmoothing

In [None]:
ets = ExponentialSmoothing(
    TRAIN_DF["count"],
    trend=None,
    damped_trend=None,
    seasonal="mul",
    seasonal_periods=7
)
ets = ets.fit()

fitted_values = ets.predict(start=TRAIN_DF.index[0], end=TRAIN_DF.index[-1])
forecasts = ets.predict(start=TEST_DF.index[0], end=TEST_DF.index[-1])

In [None]:
plot_timeseries(TRAIN_DF["count"], fitted_values)
plot_timeseries(TEST_DF["count"], forecasts)

### SARIMAX

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
TRAIN_DF["count"].diff(7).diff(1).dropna().plot()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3.5), sharey=True,)

plot_acf(TRAIN_DF["count"].diff(7).diff(1).dropna(), ax=ax[0])
ax[0].set(xlabel="Lag")

plot_pacf(TRAIN_DF["count"].diff(7).diff(1).dropna(), ax=ax[1])
ax[1].set(xlabel="Lag")

fig.tight_layout();

In [None]:
sarimax = SARIMAX(
    endog=TRAIN_DF["count"],
    order=(1, 1, 1),
    seasonal_order=(2, 1, 1, 7),
    freq="D"
)
sarimax = sarimax.fit()     

fitted_values = sarimax.predict(start=TRAIN_DF.index[0], end=TRAIN_DF.index[-1])
forecasts = sarimax.predict(start=TEST_DF.index[0], end=TEST_DF.index[-1])

In [None]:
plot_timeseries(TRAIN_DF["count"], fitted_values)
plot_timeseries(TEST_DF["count"], forecasts)

### Theta Model

In [None]:
from statsmodels.tsa.forecasting.theta import ThetaModel

In [None]:
theta = ThetaModel(endog=TRAIN_DF["count"], period=7)
theta = theta.fit()

forecasts = theta.forecast(steps=len(TEST_DF))
forecasts.index = TEST_DF.index

In [None]:
plot_timeseries(TEST_DF["count"], forecasts)

### Prophet

In [None]:
from prophet import Prophet

In [None]:
prophet_train_df = pd.DataFrame({"ds": TRAIN_DF.index.values, "y": TRAIN_DF["count"].values})
prophet_test_df = pd.DataFrame({"ds": TEST_DF.index.values})

prophet = Prophet()
prophet = prophet.fit(prophet_train_df)

fitted_values = prophet.predict(prophet_train_df)
fitted_values = fitted_values[["ds", "yhat"]].rename(columns={"ds":"date", "yhat": "count"}).set_index("date").squeeze()

forecasts = prophet.predict(prophet_test_df)
forecasts = forecasts[["ds", "yhat"]].rename(columns={"ds":"date", "yhat": "count"}).set_index("date").squeeze()

In [None]:
plot_timeseries(TRAIN_DF["count"], fitted_values)
plot_timeseries(TEST_DF["count"], forecasts)

## Save Forecasts