## Investigate model performance

This notebook allows ML model maintainers to investigate the performance of a model through

1) visualization of the backtest results
2) shap-values of the model

For future improvements add:
- simple procedures for some HP tuning (potentially work into component)

In [None]:
# add src to path for imports of local modules
import os
import sys
sys.path.append(os.path.join(os.getenv("REPOSITORY_ROOT"), "src"))



In [None]:
# IMPORTS
import pickle

import pandas as pd
from darts import TimeSeries
from darts.models import RandomForest # change this to whatever Darts model you are investigating
from darts.explainability import ShapExplainer
from darts.metrics import rmse, wmape
import matplotlib.pyplot as plt

In [None]:
# NOTEBOOK PARAMETERS
REPOSITORY_ROOT = os.getenv("REPOSITORY_ROOT")

MODEL_PATH = os.path.join(REPOSITORY_ROOT, "models/random_forest_1111.pkl")
TARGET_SERIES_PATH = os.path.join(REPOSITORY_ROOT, "data/pipeline_runs/train_targets_daily.parquet")  # from preprocessing pipeline run
FUTURE_COVARIATES_PATH = os.path.join(REPOSITORY_ROOT, "data/pipeline_runs/future_covariates.parquet")  # from preprocessing pipeline run
PAST_COVARIATES_PATH = os.path.join(REPOSITORY_ROOT, "data/pipeline_runs/past_covariates.parquet")  # from preprocessing pipeline run
TARGET_COLUMN = "Quantity"
TIME_COLUMN = "InvoiceDate"
PAST_COVARIATES_COLUMNS = [
    "num_transactions",
    "num_unique_customers",
    "num_unique_articles",
    "avg_basket_size",
    "avg_unit_price"
]
FUTURE_COVARIATES_COLUMNS = [
    "is_holiday",
]
FORECAST_HORIZON = 7
BACKTEST_STRIDE = 1
BACKTEST_RETRAIN = True
BACKTEST_LAST_POINTS_ONLY = False
BACKTEST_METRICS = [rmse, wmape]
BACKTEST_START = 0.8

### 1. Load model and data

In [None]:
with open(MODEL_PATH, "rb") as f:
    model: RandomForest = pickle.load(f) # change this to whatever Darts model you are investigating
print(model)

target_train_df = pd.read_parquet(TARGET_SERIES_PATH)
past_covariates_df = pd.read_parquet(PAST_COVARIATES_PATH)
future_covariates_df = pd.read_parquet(FUTURE_COVARIATES_PATH)

target_train = TimeSeries.from_dataframe(
    target_train_df,
    time_col=TIME_COLUMN,
    value_cols=TARGET_COLUMN,
    fill_missing_dates=True,
    fillna_value=0,
    freq="D",
)
past_covariates = TimeSeries.from_dataframe(
    past_covariates_df,
    time_col=TIME_COLUMN,
    value_cols=PAST_COVARIATES_COLUMNS,
    fill_missing_dates=True,
    fillna_value=0,
    freq="D",
)
future_covariates = TimeSeries.from_dataframe(
    future_covariates_df,
    time_col=TIME_COLUMN,
    value_cols=FUTURE_COVARIATES_COLUMNS,
    fill_missing_dates=True,
    fillna_value=0,
    freq="D",
)
                            

2. Generate historical forecasts (backtest) and metrics

In [None]:
historical_forecasts = model.historical_forecasts(
    series=target_train,
    past_covariates=past_covariates,
    future_covariates=future_covariates,
    forecast_horizon=FORECAST_HORIZON,
    stride=BACKTEST_STRIDE,
    retrain=BACKTEST_RETRAIN,
    last_points_only=BACKTEST_LAST_POINTS_ONLY,
    start=BACKTEST_START,
    show_warnings=False, # suppress deprication warning for now
)
# Plot overlapping historical forecasts vs actuals
plt.figure(figsize=(10, 6))
for forecast in historical_forecasts:
    forecast.plot(lw=1, alpha=0.3)
actual_series = target_train.drop_before(historical_forecasts[0].start_time())
actual_series.plot(label="actual", color="black", lw=1)
plt.title("Historical Forecasts vs Actuals")
# Remove legend (too many series to show)
legend = plt.gca().get_legend()
if legend:
    legend.remove()
plt.show()

In [None]:
backtest_results = model.backtest(
    series=target_train,
    historical_forecasts=historical_forecasts,
    last_points_only=BACKTEST_LAST_POINTS_ONLY,
    metric=BACKTEST_METRICS,
)
print("BACKTEST METRICS:")
print(f"Mean RMSE over all windows: {backtest_results[0]}")
print(f"Mean target series value: {target_train.values().mean()}")
print(f"Mean WMAPE over all windows: {backtest_results[1]}")