In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader

from bikes.preprocess.preprocess import get_tensor_train_dataset, Scaler
from bikes.evaluate.split import train_test_split

In [None]:
def plot_timeseries(actual: pd.Series, predicted: pd.Series):
    fig, ax = plt.subplots()

    ax.plot(actual, label="Observed")
    ax.plot(predicted, label="Predicted")

    ax.set(ylabel="Count")
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    ax.legend()

    fig.tight_layout();

    return ax

## LSTM

In [None]:
class LstmForecaster(nn.Module):
    def __init__(self,  horizon: int, input_size: int = 1, hidden_size: int = 25, num_layers: int =1):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(in_features=hidden_size, out_features=horizon)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out_, _ = self.lstm(x)
        return self.linear(out_)

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["date"])

In [None]:
LOCATION = "Quay Street Eco Display Classic"

location_df = cycle_counts.loc[cycle_counts["location"] == LOCATION].copy()
location_df = location_df.set_index("date").sort_index()
train_df, test_df = train_test_split(location_df)
y_train, y_test = train_df["count"], test_df["count"]

In [None]:
fig, ax = plt.subplots()
ax.plot(y_train.iloc[-500:], label="Observed", lw=2)
ax.set(ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
ax.legend()
fig.tight_layout();

In [None]:
# Prepare data
scaler = Scaler()
y_train_scaled = scaler.fit_transform(y_train)

out_seq_length = len(y_test.index)
in_seq_length = 6 * out_seq_length

ts = get_tensor_train_dataset(y_train_scaled, in_seq_length=in_seq_length, out_seq_length=out_seq_length)
dataloader = DataLoader(ts, batch_size=32)

In [None]:
# Training loop
model = LstmForecaster(horizon=out_seq_length)

loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-03)

model.train()
iteration_loss = []
n_epochs = 100
pgbar = tqdm(range(n_epochs))
for epoch in pgbar:
    for X, y in dataloader:
        optimizer.zero_grad()
        y_hat = model(X)
        loss = loss_fn(y, y_hat)
        loss.backward()
        optimizer.step()
    iteration_loss.append(float(loss.detach()))
    pgbar.set_description(f"Epoch [{epoch + 1} / {n_epochs}] - Loss = {loss:.3f}")

In [None]:
plt.plot(iteration_loss)
plt.xlabel("Epoch")
plt.ylabel("MSE Loss");

In [None]:
# Forecast
X_test = y_train_scaled.iloc[-in_seq_length:].values
X_test = torch.tensor(X_test, dtype=torch.float)
X_test = X_test.view(-1, in_seq_length, 1)

model.eval()
with torch.no_grad():
    forecasts = model(X_test)

forecasts = pd.Series(data=forecasts[-1, -1].numpy(), index=y_test.index)
forecasts = scaler.inverse_transform(forecasts)

In [None]:
plot_timeseries(y_test, forecasts)

In [None]:
# Save forecasts
forecast_df = pd.merge(
    left=test_df.rename(columns={"count": "ytrue"}),
    right=forecasts.to_frame(name="yhat_lstm"),
    left_index=True,
    right_index=True,
    how="left"
)
forecast_df = forecast_df.reset_index()

In [None]:
assert not forecast_df["date"].isna().any()

In [None]:
forecast_df.to_csv(f"./forecasts/lstm/{LOCATION.replace(' ', '_').lower()}.csv")