In [None]:
# https://github.com/Nixtla/neuralforecast/blob/main/nbs/models.deepar.ipynb
# https://github.com/Nixtla/neuralforecast/blob/main/nbs/losses.pytorch.ipynb

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from torch.distributions import NegativeBinomial, Normal

from bikes.preprocess.preprocess import get_tensor_train_dataset, MeanScaler
from bikes.evaluate.split import train_test_split

In [None]:
def plot_timeseries(actual: pd.Series, predicted: pd.Series):
    fig, ax = plt.subplots()

    ax.plot(actual, label="Observed")
    ax.plot(predicted, label="Predicted")

    ax.set(ylabel="Count")
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    ax.legend()

    fig.tight_layout();

    return ax

## DeepAR

### Distribution Loss

In [None]:
def scale_normal_params(
    output: torch.Tensor,
    loc: torch.Tensor | None = None,
    scale: torch.Tensor | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Scale model outputs back to original scale using standard scaling
    """
    mean, std = torch.tensor_split(output, 2, dim=2)
    std = F.softplus(std)
    if (loc is not None) and (scale is not None):
        mean = mean * scale + loc
        std = (std + 0.2) * scale
    return mean, std

class NormalDistributionLoss(nn.Module):
    def __init__(self, scaling_fn):
        super().__init__()
        self.scaling_fn = scaling_fn

    def __call__(
        self,
        output: torch.Tensor,
        y: torch.Tensor,
        loc: torch.Tensor | None = None,
        scale: torch.Tensor | None = None,
    ):
        mean, std = self.scaling_fn(output, loc, scale)
        loss_dist = Normal(loc=mean, scale=std)
        return (-loss_dist.log_prob(y)).mean()

In [None]:
def scale_nb_params_standard_scaling(
    output: torch.Tensor,
    loc: torch.Tensor | None = None,
    scale: torch.Tensor | None = None,
):
    """
    Scale model outputs back to original scale using standard scaling
    """
    mu, alpha = torch.tensor_split(output, 2, dim=2)
    mu = F.softplus(mu) + 1e-08
    alpha = F.softplus(alpha) + 1e-08
    if (loc is not None) and (scale is not None):
        mu = mu * scale + loc
        alpha = alpha / (scale + 1.)
    total_count = 1.0 / alpha
    probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-08
    return total_count, probs


def scale_nb_params_mean_scaling(
    output: torch.Tensor,
    loc: torch.Tensor | None = None,
    scale: torch.Tensor | None = None,
):
    """
    Scale model outputs back to original scale using mean scaling
    """
    mu, alpha = torch.tensor_split(output, 2, dim=2)
    mu = F.softplus(mu) + 1e-08
    alpha = F.softplus(alpha) + 1e-08
    if scale is not None:
        mu = mu * scale
        alpha = alpha / scale ** 0.5
    total_count = 1.0 / alpha
    probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-08
    return total_count, probs



class NegBinomialDistributionLoss(nn.Module):
    def __init__(self, scaling_fn):
        super().__init__()
        self.scaling_fn = scaling_fn

    def __call__(
        self,
        output: torch.Tensor,
        y: torch.Tensor,
        loc: torch.Tensor | None = None,
        scale: torch.Tensor | None = None,
    ):
        total_count, probs = self.scaling_fn(output, loc, scale)
        loss_dist = NegativeBinomial(total_count=total_count, probs=probs)
        return (-loss_dist.log_prob(y)).mean()

In [None]:
class DeepARDecoder(nn.Module):
    def __init__(
        self,
        in_features: int,
        out_features: int = 2,
        hidden_size: int = 25,
        n_layers: int = 1,
    ):
        super().__init__()

        if n_layers <= 1:
            # Linear transformation only
            layers = [nn.Linear(in_features=in_features, out_features=out_features)]
        else:
            # Input layer
            layers = [nn.Linear(in_features=in_features, out_features=hidden_size), nn.ReLU()]
            # Hidden layers
            for _ in range(n_layers - 2):
                layers += [nn.Linear(in_features=hidden_size, out_features=hidden_size), nn.ReLU()]
            # Output layer
            layers += [nn.Linear(in_features=hidden_size, out_features=out_features)]

        self.decoder = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.decoder(x)


class DeepAR(nn.Module):
    def __init__(
        self,
        input_size: int = 1,
        encoder_n_layers: int = 1,
        encoder_hidden_size: int = 128,
        encoder_dropout: float = 0.1,
        decoder_output_size: int = 2,
        decoder_hidden_size: int = 25,
        decoder_n_layers: int = 1,
        trajectory_samples: int = 100,
    ):
        super().__init__()
        
        # LSTM encoder
        self.encoder = nn.LSTM(
            input_size=input_size,
            hidden_size=encoder_hidden_size,
            num_layers=encoder_n_layers,
            dropout=encoder_dropout,
            batch_first=True
        )

        # Sequential decoder
        self.n_samples = trajectory_samples
        self.decoder = DeepARDecoder(
            in_features=encoder_hidden_size,
            out_features=decoder_output_size,
            hidden_size=decoder_hidden_size,
            n_layers=decoder_n_layers,
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        hidden_state, _ = self.encoder(x)
        output = self.decoder(hidden_state)
        return output

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["date"])

In [None]:
LOCATION = "Quay Street Eco Display Classic"

location_df = cycle_counts.loc[cycle_counts["location"] == LOCATION].copy()
location_df = location_df.set_index("date").sort_index()
train_df, test_df = train_test_split(location_df)
y_train, y_test = train_df["count"], test_df["count"]

In [None]:
fig, ax = plt.subplots()
ax.plot(y_train.iloc[-500:], label="Observed", lw=2)
ax.set(ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
ax.legend()
fig.tight_layout();

In [None]:
# Prepare data
scaler = MeanScaler()
y_train_scaled = scaler.fit_transform(y_train)

out_seq_length = 1      # DeepAR is trained with 1-step ahead forecasts
in_seq_length = 6 * 30  # 6 months of input sequence length

ts = get_tensor_train_dataset(y_train, in_seq_length=in_seq_length, out_seq_length=out_seq_length)
ts_scaled = get_tensor_train_dataset(y_train_scaled, in_seq_length=in_seq_length, out_seq_length=out_seq_length)
dataloader = DataLoader(TensorDataset(*ts.tensors, *ts_scaled.tensors), batch_size=32)

In [None]:
# Train
deepar = DeepAR()
loss_fn = NegBinomialDistributionLoss(scaling_fn=scale_nb_params_mean_scaling)
optimizer = Adam(params=deepar.parameters(), lr=1e-03)

deepar.train()
iteration_loss = []
n_epochs = 100
pgbar = tqdm(range(n_epochs))
for epoch in pgbar:
    for X, y, X_scaled, y_scaled in dataloader:
        optimizer.zero_grad()
        y_hat = deepar(X_scaled)
        loss = loss_fn(output=y_hat, y=y, loc=None, scale=scaler.mean_)
        loss.backward()
        optimizer.step()
    iteration_loss.append(float(loss.detach()))
    pgbar.set_description(f"Epoch [{epoch + 1} / {n_epochs}] - Loss = {loss:.3f}")

In [None]:
plt.plot(iteration_loss)
plt.xlabel("Epoch")
plt.ylabel("Log Loss");

In [None]:
# Forecast
horizon = len(y_test)
n_samples = 100

deepar.eval()
with torch.no_grad():
    forecast_distribution = []
    for s in tqdm(range(n_samples)):
        
        X_test = y_train_scaled.iloc[-in_seq_length:].values
        X_test = torch.tensor(X_test, dtype=torch.float32)
        X_test = X_test.view(-1, in_seq_length, 1)
    
        trajectory = []
        for h in range(horizon):
            output = deepar(X_test)
            output = output[:, -1, :].view(-1, 1, 2)  # Only keep next out-of-sample prediction
            
            # Forecast by sampling from NB distribution
            total_count, probs = scale_nb_params_mean_scaling(output, loc=None, scale=scaler.mean_)
            distr = NegativeBinomial(total_count=total_count, probs=probs)
            y_hat = distr.sample()
            
            # Save forecast
            trajectory.append(y_hat)

            # Append scaled forecast to input for next step
            y_hat_scaled = y_hat / scaler.mean_
            X_test = torch.cat((X_test, y_hat_scaled), dim=1)

        trajectory = torch.cat(trajectory, dim=1)
        forecast_distribution.append(trajectory)

In [None]:
forecasts = torch.cat(forecast_distribution, dim=0)

In [None]:
forecast_mean = torch.mean(forecasts, dim=0)

In [None]:
forecast_series = pd.Series(data=forecast_mean.flatten().numpy(), index=y_test.index)

In [None]:
plt.plot(y_test, label="Observed")
plt.plot(forecast_series, label="Forecast")
plt.legend()