In [5]:
import os
import pandas as pd
import numpy as np

# =====================================================================
# 0. SETUP
# =====================================================================

# Paths (adjust if needed)
data_path = "/h/kupfersk/cfpr_2026/data/processed_data_updated/STATSCAN_food_cpi_processed.csv"
forecast_base = "/h/kupfersk/cfpr_2026/generate_forecasts/output/Forecasts"

# Load ground truth CPI data
df = pd.read_csv(data_path, parse_dates=["index"], index_col="index")

food_cat = list(df.columns)
models = sorted(os.listdir(forecast_base))

# =====================================================================
# 1️⃣ CALCULATE GROUND TRUTH YOY CHANGE (2025 vs 2024)
# =====================================================================

# Mean CPI (Jan–Sep 2024) and (Jan–Sep 2025)
p1 = df.loc["2024-01-01":"2024-09-01"].mean(axis=0).rename("2024 Mean CPI (Jan–Sep)")
p2 = df.loc["2025-01-01":"2025-09-01"].mean(axis=0).rename("2025 Mean CPI (Jan–Sep)")

# Actual YoY % change
actual_yoy = (((p2 - p1) / p1) * 100).rename("Actual YoY % Change (2025 vs 2024)")

# For reference
actual_df = pd.concat((p1, p2, actual_yoy), axis=1)
print("\n=== (1) Actual YoY Change (Ground Truth) ===")
display(actual_df.round(2))

# =====================================================================
# 2️⃣ FORECASTED YOY CHANGE + RESIDUAL PER MODEL
# =====================================================================

records = []

for category in food_cat:
    for model in models:
        forecast_file = os.path.join(
            forecast_base,
            model,
            "random_seed_42",
            "2024-07-01",
            "forecasts",
            f"food_cpi: {category}.csv"
        )
        if not os.path.exists(forecast_file):
            continue

        # Load forecast data
        temp = pd.read_csv(forecast_file, parse_dates=["timestamp"]).set_index("timestamp")

        # Compute forecast mean for Jan–Sep 2025
        fc_mean_2025 = temp.loc["2025-01-01":"2025-09-01"]["q_0.5"].mean()

        # Reference (actual) mean CPI for Jan–Sep 2024
        ref_mean_2024 = p1.get(category, np.nan)

        # Predicted YoY % change
        forecast_yoy = (fc_mean_2025 - ref_mean_2024) / ref_mean_2024 * 100

        # Actual YoY for comparison
        actual_yoy_value = actual_yoy.get(category, np.nan)

        # Residual = difference between forecast and actual
        residual = forecast_yoy - actual_yoy_value

        records.append({
            "category": category,
            "model": model,
            "forecast_YoY_%": forecast_yoy,
            "actual_YoY_%": actual_yoy_value,
            "residual_%": residual
        })

forecast_yoy_df = pd.DataFrame(records)

# =====================================================================
# 3️⃣ CATEGORY-LEVEL RESIDUAL SUMMARIES
# =====================================================================

print("\n===== Category-Level YoY Residual Summaries =====")

# Dictionary to hold each category's top 10
category_residual_results = {}

for category in food_cat:
    # Filter for this category only
    cat_df = forecast_yoy_df[forecast_yoy_df["category"] == category].copy()

    if cat_df.empty:
        print(f"\n⚠️ No forecasts found for category: {category}")
        continue

    # Sort by absolute residual (smaller = better)
    cat_sorted = cat_df.sort_values(by="residual_%", key=lambda x: abs(x)).set_index("model")

    # Select top 10
    top10 = cat_sorted["residual_%"].head(10)

    # Store and print
    category_residual_results[category] = top10

    print(f"\n{category}")
    print(top10.round(2))




=== (1) Actual YoY Change (Ground Truth) ===


Unnamed: 0,2024 Mean CPI (Jan–Sep),2025 Mean CPI (Jan–Sep),Actual YoY % Change (2025 vs 2024)
Food,189.06,193.8,2.51
Meat,210.07,219.51,4.5
"Fish, seafood and other marine products",161.98,165.16,1.97
Dairy products and eggs,172.01,176.4,2.55
Bakery and cereal products (excluding baby food),197.73,199.1,0.69
"Fruit, fruit preparations and nuts",168.7,176.25,4.48
Vegetables and vegetable preparations,191.16,193.18,1.06
Other food products and non-alcoholic beverages,177.27,184.24,3.93
Food purchased from restaurants,194.19,197.2,1.55



===== Category-Level YoY Residual Summaries =====

Food
model
ag_local_TemporalFusionTransformerModel                    0.25
ag_global_all_ChronosModel                                -0.46
ag_global_all_SimpleFeedForwardModel_exp5_geopolitical_    0.57
ag_local_AutoARIMAModel                                    0.60
ag_local_DLinearModel                                     -0.65
ag_global_all_SimpleFeedForwardModel_exp1_human_           0.80
ag_local_ChronosModel                                     -0.85
ag_global_all_DeepARModel                                 -0.86
ag_local_AutoETSModel                                      1.08
ag_global_all_TemporalFusionTransformerModel               1.11
Name: residual_%, dtype: float64

Meat
model
ag_global_all_PatchTSTModel                          -0.12
ag_global_all_SimpleFeedForwardModel_exp5_climate_   -0.14
ag_global_all_ChronosModel                           -0.19
claude-3-5-sonnet-20240620_report_False              -0.20
ag_local_SimpleF

In [7]:
# =====================================================================
# 4️⃣ SAVE RESULTS FOR REPORTING AND CROSS-ANALYSIS
# =====================================================================

import pickle

save_dir = "./cfpr_evaluation_dicts"
os.makedirs(save_dir, exist_ok=True)

# --- 1️⃣ Save full forecast-vs-actual comparison
forecast_yoy_df.to_csv(os.path.join(save_dir, "forecast_yoy_df.csv"), index=False)

# --- 2️⃣ Save category-level residual results (dict)
with open(os.path.join(save_dir, "category_residual_results.pkl"), "wb") as f:
    pickle.dump(category_residual_results, f)

# --- 3️⃣ Compute and save overall model-level summary
#     → Mean absolute residual across all categories (smaller = better)
overall_summary_yoy = (
    forecast_yoy_df.groupby("model")["residual_%"]
    .apply(lambda x: np.mean(np.abs(x)))
    .sort_values()
    .reset_index()
    .rename(columns={"residual_%": "Mean_Abs_Residual_%"})
)

overall_summary_yoy.to_csv(os.path.join(save_dir, "overall_summary_yoy.csv"), index=False)

print("✅ Saved YoY evaluation outputs:")
print(f"• forecast_yoy_df.csv — full forecast vs actual comparison")
print(f"• category_residual_results.pkl — top 10 residuals per category")
print(f"• overall_summary_yoy.csv — average absolute residuals per model\n")

# --- Optional: print the top-performing models
print("\n===== Top 10 Models by Mean Absolute Residual (All Categories) =====")
print(overall_summary_yoy.head(10).round(3))


✅ Saved YoY evaluation outputs:
• forecast_yoy_df.csv — full forecast vs actual comparison
• category_residual_results.pkl — top 10 residuals per category
• overall_summary_yoy.csv — average absolute residuals per model


===== Top 10 Models by Mean Absolute Residual (All Categories) =====
                                               model  Mean_Abs_Residual_%
0            claude-3-5-sonnet-20240620_report_False                0.175
1             claude-3-5-sonnet-20240620_report_True                0.529
2                         gemini-1.5-pro_report_True                1.023
3                         ag_global_all_ChronosModel                1.138
4                              ag_local_AutoETSModel                1.203
5                          ag_global_all_DeepARModel                1.237
6                        gemini-1.5-pro_report_False                1.254
7  ag_global_cpi_with_covariates_TemporalFusionTr...                1.317
8  ag_global_all_SimpleFeedForwardModel_exp