In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from bikes.evaluate.config import TEST_LOCATIONS
from bikes.evaluate.split import train_test_split
from bikes.evaluate.metrics import mape, mase

In [None]:
def file_from_location(location: str, suffix: str) -> str:
    location = location.replace(" -", "").replace(" ", "_").lower()
    return location + suffix

## Compare Forecasts

In [None]:
FORECAST_BASE_PATH = Path("./forecasts/")
MODELS = ["ets", "sarimax", "theta", "prophet", "rnn", "lstm", "deepar"]

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["date"])

In [None]:
# Load forecasts

all_forecasts = {}

for location in TEST_LOCATIONS:
    all_forecasts[location] = {}
    
    location_df = cycle_counts.loc[cycle_counts["location"] == location].copy()
    location_df = location_df.set_index("date").sort_index()

    train_df, test_df = train_test_split(location_df)
    all_forecasts[location]["train_df"] = train_df
    all_forecasts[location]["test_df"] = test_df

    location_csv = file_from_location(location, ".csv")
    forecast_df = test_df.copy()
    for model in MODELS:
        model_forecasts = pd.read_csv(
            FORECAST_BASE_PATH / model / location_csv,
            index_col=0,
            parse_dates=["date"],
            dtype={"ytrue": int, f"yhat_{model}": float}
        )

        assert (model_forecasts["location"] == location).all()
        assert (model_forecasts["date"].isin(test_df.index)).all()
        model_forecasts = model_forecasts.drop(columns=["location"]).set_index("date")

        forecast_df = pd.merge(
            left=forecast_df,
            right=model_forecasts,
            left_index=True,
            right_index=True,
            how="left"
        )
        
        assert (forecast_df["count"] == forecast_df["ytrue"]).all()
        forecast_df = forecast_df.drop(columns=["ytrue"])

    forecast_df = forecast_df.rename(columns={"count": "ytrue"})
    all_forecasts[location]["forecast_df"] = forecast_df

In [None]:
# Plot forecasts

location = TEST_LOCATIONS[18]
forecast_df = all_forecasts[location]["forecast_df"]

fig, ax = plt.subplots(1, 1, figsize=(10, 3.5))
colors = sns.color_palette("Dark2", n_colors=len(MODELS))

# Ground truth
ax.plot(
    forecast_df.index,
    forecast_df["ytrue"].values,
    color="black",
    lw=2.5,
)

# Model forecasts
for i, model in enumerate(MODELS):
    color = colors[i % len(colors)]
    ax.plot(
        forecast_df.index,
        forecast_df[f"yhat_{model}"].values,
        color=color,
        lw=2.5,
        label=model,
        ls="--",
    )

ax.set(ylabel="Count", title=location, ylim=(-10, None))
ax.legend(frameon=True, loc="lower left", ncol=4, fontsize="small")
ax.tick_params(axis='x', rotation=45)
ax.grid(True, linestyle=":", alpha=0.8)
fig.tight_layout()
# plt.savefig(f"./plots/{file_from_location(location, '.png')}", dpi=300);

In [None]:
# Compute metrics
# model: location: metric: metric_value
all_model_metrics = {}
for model in MODELS:
    
    location_metrics = {}
    for location in TEST_LOCATIONS:
        dfs = all_forecasts[location]
        
        y_train = dfs["train_df"]["count"]
        y_test = dfs["test_df"]["count"]
        y_pred = dfs["forecast_df"][f"yhat_{model}"]
    
        mape_ = mape(y_true=y_test, y_pred=y_pred)
        mase_ = mase(y_train=y_train, y_true=y_test, y_pred=y_pred)
        metrics = {"mape": float(mape_), "mase": float(mase_)}
        location_metrics[location] = metrics

    all_model_metrics[model] = location_metrics


all_location_metrics = {}
for location in TEST_LOCATIONS:
    dfs = all_forecasts[location]
    
    y_train = dfs["train_df"]["count"]
    y_test = dfs["test_df"]["count"]

    model_metrics = {}
    for model in MODELS:
        y_pred = dfs["forecast_df"][f"yhat_{model}"]
        mape_ = mape(y_true=y_test, y_pred=y_pred)
        mase_ = mase(y_train=y_train, y_true=y_test, y_pred=y_pred)
        metrics = {"mape": float(mape_), "mase": float(mase_)}
        model_metrics[model] = metrics
    
    all_location_metrics[location] = model_metrics

In [None]:
# Plot metrics comparison for each model by location
model = MODELS[6]
location_metrics = all_model_metrics[model]

fig, ax = plt.subplots(2, 1, figsize=(10, 4.5), sharex=True)

# MAPEs
location_mapes = [location_metrics[l]["mape"] for l in TEST_LOCATIONS]
ax[0].bar(TEST_LOCATIONS, location_mapes, alpha=0.8)

av_mape = np.mean(location_mapes)
ax[0].axhline(av_mape, color="gray", ls="--", lw=1.5)
ax[0].text(len(TEST_LOCATIONS) - 0.5, av_mape * 1.1, f"{av_mape:.2f}", color="gray")
ax[0].set(ylabel="MAPE")

# MASEs
location_mases = [location_metrics[l]["mase"] for l in TEST_LOCATIONS]
ax[1].bar(TEST_LOCATIONS, location_mases, alpha=0.75)

av_mase = np.mean(location_mases)
ax[1].axhline(av_mase, color="gray", ls="--", lw=1.5)
ax[1].text(len(TEST_LOCATIONS) - 0.5, av_mase * 1.1, f"{av_mase:.2f}", color="gray")
ax[1].set(ylabel="MASE")

# Format x-tick-labels
xticks = np.arange(len(TEST_LOCATIONS))
ax[1].set_xticks(xticks)
ax[1].set_xticklabels(TEST_LOCATIONS, rotation=45, ha="right", fontsize="small")

fig.suptitle(model.upper())
fig.align_labels()
fig.tight_layout();
plt.savefig(f"./plots/{model}.png", dpi=300);

In [None]:
# Plot metrics comparison for location by model
location = TEST_LOCATIONS[18]
model_metrics = all_location_metrics[location]

fig, ax = plt.subplots(2, 1, figsize=(10, 4.5), sharex=True)

# MAPEs
model_mapes = [model_metrics[m]["mape"] for m in MODELS]
ax[0].bar(MODELS, model_mapes, alpha=0.8)

av_mape = np.mean(model_mapes)
ax[0].axhline(av_mape, color="gray", ls="--", lw=1.5)
ax[0].text(len(MODELS) - 0.55, av_mape * 1.05, f"{av_mape:.2f}", color="gray")
ax[0].set(ylabel="MAPE")
ax[0].set_ylim((0, max(model_mapes) * 1.2))

# MASEs
model_mases = [model_metrics[m]["mase"] for m in MODELS]
ax[1].bar(MODELS, model_mases, alpha=0.75)

av_mase = np.mean(model_mases)
ax[1].axhline(av_mase, color="gray", ls="--", lw=1.5)
ax[1].text(len(MODELS) - 0.55, av_mase * 1.05, f"{av_mase:.2f}", color="gray")
ax[1].set(ylabel="MASE")
ax[1].set_ylim((0, max(model_mases) * 1.2))

# Format x-tick-labels
xticks = np.arange(len(MODELS))
ax[1].set_xticks(xticks)
ax[1].set_xticklabels(MODELS, rotation=45, ha="right")

fig.suptitle(location.upper())
fig.align_labels()
fig.tight_layout();
plt.savefig(f"./plots/locations/{location}.png", dpi=300);