# Forecasting Consensus Expectations: Continuing Claims 

## Point and Directional Forecasts

In [2]:
import os
import warnings
import math
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st

from tqdm.auto import tqdm
from scipy import stats, special
from scipy.optimize import brentq, minimize
from scipy.stats import t as student_t, norm, binomtest, jarque_bera
from sklearn.mixture import GaussianMixture
from collections import defaultdict
from itertools import product
from arch.univariate import ConstantMean, GARCH, StudentsT
from arch.univariate.base import ConvergenceWarning
from IPython.display import display, Markdown
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import acf

In [3]:
OUT_DIR = "../out"         
DF_FILE       = "contClaims_df.parquet"
DF_FULL_FILE  = "contClaims_df_full.parquet"

df       = pd.read_parquet(os.path.join(OUT_DIR, DF_FILE),      engine="pyarrow")
df_full  = pd.read_parquet(os.path.join(OUT_DIR, DF_FULL_FILE), engine="pyarrow")

print("df shape     :", df.shape)
print("df_full shape:", df_full.shape)

df shape     : (8755, 10)
df_full shape: (9990, 10)


In [4]:
df.head()

Unnamed: 0,release_date,period,actual,median_forecast,economist,firm,forecast,asof,surprise,error
0,2002-08-08,2002-07-27,3532.0,3480.0,Bill Sharp,JPMorgan Chase & Co,3475.0,2002-08-06,52.0,-57.0
1,2002-08-08,2002-07-27,3532.0,3480.0,Ethan S Harris,BofA Securities Inc,3480.0,2002-08-02,52.0,-52.0
2,2002-08-08,2002-07-27,3532.0,3480.0,Maxwell Clarke,Idea Global,3519.0,2002-08-08,52.0,-13.0
3,2002-08-15,2002-08-03,3576.0,,Ethan S Harris,BofA Securities Inc,3500.0,2002-08-09,,-76.0
4,2002-08-15,2002-08-03,3576.0,,Maxwell Clarke,Idea Global,3540.0,2002-08-12,,-36.0


## 1 Point Forecast Ensembles 

### 1.1 Static inverse-error

In [7]:
warnings.filterwarnings("ignore")

# --------------------------- SETTINGS --------------------------
CONT_WINDOWS = [3, 6, 12]                 # contiguity windows
METHODS      = ["inverse_mse", "inverse_mae", "equal_weight"]
RIDGE        = 1e-6
PANELS       = {"COVID": df.copy(), "Full": df_full.copy()}

REGIMES = {                                    # for stratified step
    "2002‑04 to 2007‑12 (pre‑GFC)"  : ("2000-04-01", "2007-12-31"),
    "2008‑01 to 2009‑12 (GFC)"      : ("2008-01-01", "2009-12-31"),
    "2010‑01 to 2019‑12 (expansion)": ("2010-01-01", "2019-12-31"),
    "2020‑01 to 2022‑12 (COVID)"    : ("2020-01-01", "2022-12-31"),
    "2023‑01 to 2025‑12 (post‑COVID)": ("2023-01-01", "2025-12-31")
}

# ==============================================================
# HELPER FUNCTIONS
# ==============================================================

def weight_vector(history: pd.DataFrame, economists: pd.Index,
                  method: str) -> pd.Series:
    """Return a normalized weight vector for the chosen `method`."""
    if method == "equal_weight":
        w = pd.Series(1.0, index=economists)
    else:
        grp = history[history["economist"].isin(economists)].groupby("economist")["error"]
        score = grp.apply(lambda s: np.nanmean(s ** 2) if method == "inverse_mse"
                          else np.nanmean(np.abs(s)))
        w = 1.0 / (score + RIDGE)
    return w / w.sum()

def backtest_panel(panel: pd.DataFrame, label: str):
    """
    Grid back‑test (window × method) on one panel.
    """
    dates = np.sort(panel["release_date"].unique())
    eval_rows, live_rows = [], []

    for window, method in tqdm(product(CONT_WINDOWS, METHODS),
                               total=len(CONT_WINDOWS) * len(METHODS),
                               desc=f"{label} grid"):
        records = []

        for idx in range(window, len(dates)):
            t_date = dates[idx]
            hist_mask = panel["release_date"].isin(dates[idx - window:idx])
            hist = panel[hist_mask]

            full_mask = hist.groupby("economist")["forecast"].apply(lambda s: s.notna().all())
            econs = full_mask[full_mask].index
            if econs.empty:
                continue

            w = weight_vector(hist, econs, method)

            cur = panel[(panel["release_date"] == t_date) &
                        (panel["economist"].isin(w.index))]
            f_t = cur.set_index("economist")["forecast"].dropna()
            w   = w.reindex(f_t.index).dropna()
            if w.empty:
                continue
            w /= w.sum()

            smart  = np.dot(w, f_t.loc[w.index])
            median = panel.loc[panel["release_date"] == t_date,
                   "median_forecast"].iloc[0]
            actual = panel.loc[panel["release_date"] == t_date, "actual"].iloc[0]
            records.append((t_date, smart, median, actual))

        if not records:
            continue

        oos = pd.DataFrame(records, columns=["date", "smart", "median", "actual"])

        # unreleased month -> live row
        if pd.isna(oos.iloc[-1, 3]):
            last = oos.iloc[-1]
            live_rows.append({
                "panel": label, "window": window, "method": method,
                "date": last["date"], "smart": last["smart"],
                "median": last["median"],
                "pred_dir": int(last["smart"] > last["median"])
            })

        eval_df = oos.dropna(subset=["actual"]).copy()
        if eval_df.empty:
            continue

        eval_df["smart_err"]  = eval_df["smart"]  - eval_df["actual"]
        eval_df["median_err"] = eval_df["median"] - eval_df["actual"]
        eval_df["pred_dir"]   = (eval_df["smart"]  > eval_df["median"]).astype(int)
        eval_df["actual_dir"] = (eval_df["actual"] > eval_df["median"]).astype(int)

        obs = len(eval_df)
        rmse_smart  = np.sqrt((eval_df["smart_err"] ** 2).mean())
        rmse_median = np.sqrt((eval_df["median_err"] ** 2).mean())

        diff  = eval_df["smart_err"] ** 2 - eval_df["median_err"] ** 2
        dm_p  = 2 * (1 - stats.norm.cdf(abs(diff.mean() /
                       diff.std(ddof=1) * np.sqrt(obs))))

        hits     = (eval_df["pred_dir"] == eval_df["actual_dir"]).sum()
        hit_rate = hits / obs
        binom_p  = stats.binomtest(hits, obs, 0.5).pvalue

        p1, p2  = eval_df["pred_dir"].mean(), eval_df["actual_dir"].mean()
        c_joint = (eval_df["pred_dir"] & eval_df["actual_dir"]).mean()
        pt_p    = 2 * (1 - stats.norm.cdf(abs(
                     (c_joint - p1 * p2) /
                     np.sqrt(p1 * p2 * (1 - p1) * (1 - p2) / obs))))

        eval_rows.append({
            "panel": label, "window": window, "method": method,
            "obs": obs, "RMSE_smart": rmse_smart, "RMSE_median": rmse_median,
            "HitRate": hit_rate, "Binom_p": binom_p, "PT_p": pt_p, "DM_p": dm_p
        })

    return pd.DataFrame(eval_rows), pd.DataFrame(live_rows)

def stratified_diagnostics(oos: pd.DataFrame, regimes: dict):
    """Return regime‑level diagnostics for one spec on the FULL panel."""
    rows = []
    oos = oos.dropna(subset=["actual"]).copy()
    if oos.empty:
        return pd.DataFrame()

    oos["smart_err"]  = oos["smart"]  - oos["actual"]
    oos["median_err"] = oos["median"] - oos["actual"]
    oos["pred_dir"]   = (oos["smart"]  > oos["median"]).astype(int)
    oos["actual_dir"] = (oos["actual"] > oos["median"]).astype(int)

    for label, (start, end) in regimes.items():
        sub = oos[(oos["date"] >= start) & (oos["date"] <= end)]
        if sub.empty:
            continue

        obs = len(sub)
        rmse_s = np.sqrt((sub["smart_err"] ** 2).mean())
        rmse_m = np.sqrt((sub["median_err"] ** 2).mean())

        diff = sub["smart_err"] ** 2 - sub["median_err"] ** 2
        dm_p = 2 * (1 - stats.norm.cdf(abs(diff.mean() /
                     diff.std(ddof=1) * np.sqrt(obs))))

        hits = (sub["pred_dir"] == sub["actual_dir"]).sum()
        hit_rate = hits / obs
        binom_p = stats.binomtest(hits, obs, 0.5).pvalue

        p1, p2 = sub["pred_dir"].mean(), sub["actual_dir"].mean()
        c_joint = (sub["pred_dir"] & sub["actual_dir"]).mean()
        pt_p = 2 * (1 - stats.norm.cdf(abs(
                 (c_joint - p1 * p2) /
                 np.sqrt(p1 * p2 * (1 - p1) * (1 - p2) / obs))))

        rows.append({
            "Regime": label, "Obs": obs,
            "RMSE_smart": rmse_s, "RMSE_median": rmse_m,
            "HitRate": hit_rate, "Binom_p": binom_p, "PT_p": pt_p, "DM_p": dm_p
        })
    return pd.DataFrame(rows)

# ==============================================================
# DRIVER
# ==============================================================

# Back‑test both panels
eval_tables, live_tables = {}, {}
for name, pnl in PANELS.items():
    eval_tables[name], live_tables[name] = backtest_panel(pnl, name)

pd.set_option("display.float_format", "{:.3f}".format)
print("\n=== Back‑test summary (COVID panel) ===")
print(eval_tables["COVID"].to_string(index=False))
print("\n=== Back‑test summary (Full panel) ===")
print(eval_tables["Full"].to_string(index=False))

# Select best spec on FULL (minimum RMSE_smart)
best_spec = eval_tables["Full"].loc[eval_tables["Full"]["RMSE_smart"].idxmin()]
best_win  = int(best_spec["window"])
best_met  = best_spec["method"]
print(f"\n>>> Best spec on FULL: window={best_win}, method={best_met}, "
      f"RMSE_smart={best_spec['RMSE_smart']:.3f}")

# Build OOS series for best spec (FULL) for stratified diagnostics
full_dates = np.sort(df_full["release_date"].unique())
oos_records = []
for idx in range(best_win, len(full_dates)):
    d = full_dates[idx]
    hist_mask = df_full["release_date"].isin(full_dates[idx - best_win:idx])
    hist = df_full[hist_mask]
    econ_mask = hist.groupby("economist")["forecast"].apply(lambda s: s.notna().all())
    econ = econ_mask[econ_mask].index
    if econ.empty:
        continue
    w = weight_vector(hist, econ, best_met)
    cur = df_full[(df_full["release_date"] == d) & (df_full["economist"].isin(w.index))]
    f_t = cur.set_index("economist")["forecast"].dropna()
    w   = w.reindex(f_t.index).dropna()
    if w.empty:
        continue
    w /= w.sum()
    smart  = np.dot(w, f_t.loc[w.index])
    median = df_full.loc[df_full["release_date"] == d,
                   "median_forecast"].iloc[0]
    actual = df_full.loc[df_full["release_date"] == d, "actual"].iloc[0]
    oos_records.append((d, smart, median, actual))
oos_best = pd.DataFrame(oos_records, columns=["date", "smart", "median", "actual"])

strat_table = stratified_diagnostics(oos_best, REGIMES)
print("\n=== Stratified regime diagnostics (FULL, best spec) ===")
print(strat_table.to_string(index=False))

# Live forecast with best spec (if any unreleased month)
live_full = live_tables["Full"]
if not live_full.empty:
    live_full = live_full[(live_full["window"] == best_win) &
                          (live_full["method"] == best_met)]

if live_full.empty:
    print("\nNo unreleased month – all actuals available.")
else:
    row = live_full.iloc[-1]
    direction = "Beat" if row["pred_dir"] else "Miss"
    print("\n=== Live forecast (FULL panel, best spec) ===")
    print(f"Date   : {pd.to_datetime(row['date']).date()}")
    print(f"Smart  : {row['smart']:.1f} k")
    print(f"Median : {row['median']:.1f} k")
    print(f"Signal : {direction}")


COVID grid:   0%|          | 0/9 [00:00<?, ?it/s]

Full grid:   0%|          | 0/9 [00:00<?, ?it/s]


=== Back‑test summary (COVID panel) ===
panel  window       method  obs  RMSE_smart  RMSE_median  HitRate  Binom_p  PT_p  DM_p
COVID       3  inverse_mse 1038      67.957       54.631    0.548    0.002 0.002 0.184
COVID       3  inverse_mae 1038      65.356       54.631    0.541    0.008 0.008 0.199
COVID       3 equal_weight 1038      62.873       54.631    0.535    0.027 0.026 0.228
COVID       6  inverse_mse 1035      57.650       54.587    0.550    0.002 0.001 0.575
COVID       6  inverse_mae 1035      59.183       54.587    0.542    0.007 0.007 0.494
COVID       6 equal_weight 1035      62.671       54.587    0.537    0.018 0.017 0.291
COVID      12  inverse_mse 1029      55.030       54.473    0.547    0.003 0.003 0.277
COVID      12  inverse_mae 1029      57.665       54.473    0.549    0.002 0.002 0.761
COVID      12 equal_weight 1029      61.535       54.473    0.540    0.011 0.010 0.556

=== Back‑test summary (Full panel) ===
panel  window       method  obs  RMSE_smart  RMSE

### 1.2 Exponentially Weighted Moving Average (EWMA)

In [9]:
# ------------------------- SETTINGS ---------------------------
contiguity_windows = [3, 6, 12]                # look‑back windows (months)
decays             = np.arange(0.75, 1.00, 0.05)   # exponential‐decay factors
ridge              = 1e-6                          # stabiliser in inverse‑MSE


# ---------------------- HELPER FUNCTIONS ----------------------
def ewma_time_weights(window: int, decay: float) -> np.ndarray:
    """Exponentially decaying weights (oldest→newest) that sum to one."""
    w = decay ** np.arange(window - 1, -1, -1, dtype=float)
    return w / w.sum()


def backtest_ewma(
    panel: pd.DataFrame,
    windows=contiguity_windows,
    decays=decays,
    ridge: float = ridge,
):
    """
    Walk‑forward back‑test for every (window, decay) specification.
    """
    panel_name = getattr(panel, "name", "panel")
    dates = np.sort(panel["release_date"].unique())

    eval_rows, live_rows, oos_map = [], [], {}

    for window, decay in tqdm(
        product(windows, decays),
        total=len(windows) * len(decays),
        desc=f"{panel_name} grid",
    ):
        per_release = []

        # walk forward through time
        for idx in range(window, len(dates)):
            t_date = dates[idx]
            hist_idx = dates[idx - window : idx]
            hist = panel[panel["release_date"].isin(hist_idx)]

            # economists with complete history in window
            econs = (
                hist.groupby("economist")["forecast"]
                .apply(lambda s: s.notna().all())
                .pipe(lambda s: s[s].index)
            )
            if econs.empty:
                continue

            # EWMA MSE for each economist
            weights_time = ewma_time_weights(window, decay)
            mse_scores = {}
            for econ in econs:
                errs = (
                    hist.loc[hist["economist"] == econ]
                    .sort_values("release_date")["error"]
                    .values
                )
                if len(errs) != window:
                    continue
                mse_scores[econ] = np.sum(weights_time * errs**2)

            if not mse_scores:
                continue

            w = pd.Series({e: 1.0 / (s + ridge) for e, s in mse_scores.items()})
            w /= w.sum()

            # current forecasts
            cur = panel[
                (panel["release_date"] == t_date)
                & (panel["economist"].isin(w.index))
            ]
            f_t = cur.set_index("economist")["forecast"].dropna()
            w   = w.reindex(f_t.index).dropna()
            if w.empty:
                continue
            w /= w.sum()

            smart   = np.dot(w, f_t.loc[w.index])
            median = panel.loc[panel["release_date"] == t_date,
                   "median_forecast"].iloc[0]
            actual  = panel.loc[
                panel["release_date"] == t_date, "actual"
            ].iloc[0]
            pred_dir = int(smart > median)

            per_release.append(
                (t_date, smart, median, actual, pred_dir)
            )

        if not per_release:
            continue

        cols = ["date", "smart", "median", "actual", "pred_dir"]
        oos = pd.DataFrame(per_release, columns=cols)
        oos_map[(window, decay)] = oos

        # capture live forecast
        unreleased = oos[oos["actual"].isna()]
        if not unreleased.empty:
            last = unreleased.iloc[-1]
            live_rows.append(
                {
                    "panel": panel_name,
                    "window": window,
                    "decay": decay,
                    "date": last["date"],
                    "smart": last["smart"],
                    "median": last["median"],
                    "pred_dir": last["pred_dir"],
                }
            )

        # evaluation (realised months only)
        eval_df = oos.dropna(subset=["actual"]).copy()
        if eval_df.empty:
            continue

        eval_df["smart_err"]  = eval_df["smart"]  - eval_df["actual"]
        eval_df["median_err"] = eval_df["median"] - eval_df["actual"]
        eval_df["actual_dir"] = (eval_df["actual"] > eval_df["median"]).astype(int)

        obs = len(eval_df)
        rmse_smart  = np.sqrt((eval_df["smart_err"] ** 2).mean())
        rmse_median = np.sqrt((eval_df["median_err"] ** 2).mean())

        diff    = eval_df["smart_err"] ** 2 - eval_df["median_err"] ** 2
        dm_stat = diff.mean() / diff.std(ddof=1) * np.sqrt(obs)
        dm_p    = 2 * (1 - stats.norm.cdf(abs(dm_stat)))

        hits     = (eval_df["actual_dir"] == eval_df["pred_dir"]).astype(int)
        hit_rate = hits.mean()
        binom_p  = stats.binomtest(hits.sum(), obs, 0.5).pvalue

        p1, p2  = eval_df["pred_dir"].mean(), eval_df["actual_dir"].mean()
        c_joint = (eval_df["pred_dir"] & eval_df["actual_dir"]).mean()
        pt_stat = (c_joint - p1 * p2) / np.sqrt(p1 * p2 * (1 - p1) * (1 - p2) / obs)
        pt_p    = 2 * (1 - stats.norm.cdf(abs(pt_stat)))

        eval_rows.append(
            {
                "panel": panel_name,
                "window": window,
                "decay": decay,
                "obs": obs,
                "RMSE_smart": rmse_smart,
                "RMSE_median": rmse_median,
                "HitRate": hit_rate,
                "Binom_p": binom_p,
                "PT_p": pt_p,
                "DM_p": dm_p,
            }
        )

    return pd.DataFrame(eval_rows), oos_map, pd.DataFrame(live_rows)


def stratified_ewma(oos_df: pd.DataFrame, regimes: dict) -> pd.DataFrame:
    """Compute diagnostics over user‑defined regimes."""
    rows = []
    for label, (start, end) in regimes.items():
        sub = oos_df[
            (oos_df["date"] >= start) & (oos_df["date"] <= end) & oos_df["actual"].notna()
        ]
        if sub.empty:
            continue

        sub = sub.copy()
        sub["smart_err"]  = sub["smart"]  - sub["actual"]
        sub["median_err"] = sub["median"] - sub["actual"]
        sub["actual_dir"] = (sub["actual"] > sub["median"]).astype(int)

        obs = len(sub)
        rmse_smart  = np.sqrt((sub["smart_err"] ** 2).mean())
        rmse_median = np.sqrt((sub["median_err"] ** 2).mean())

        diff    = sub["smart_err"] ** 2 - sub["median_err"] ** 2
        dm_stat = diff.mean() / diff.std(ddof=1) * np.sqrt(obs)
        dm_p    = 2 * (1 - stats.norm.cdf(abs(dm_stat)))

        hits     = (sub["actual_dir"] == sub["pred_dir"]).astype(int)
        hit_rate = hits.mean()
        binom_p  = stats.binomtest(hits.sum(), obs, 0.5).pvalue

        p1, p2  = sub["pred_dir"].mean(), sub["actual_dir"].mean()
        c_joint = (sub["pred_dir"] & sub["actual_dir"]).mean()
        pt_stat = (c_joint - p1 * p2) / np.sqrt(p1 * p2 * (1 - p1) * (1 - p2) / obs)
        pt_p    = 2 * (1 - stats.norm.cdf(abs(pt_stat)))

        rows.append(
            {
                "Regime": label,
                "Obs": obs,
                "RMSE_smart": rmse_smart,
                "RMSE_median": rmse_median,
                "HitRate": hit_rate,
                "Binom_p": binom_p,
                "PT_p": pt_p,
                "DM_p": dm_p,
            }
        )
    return pd.DataFrame(rows)

# -------------------------- DRIVER -----------------------------
# Tag the two DataFrames so helper functions can reference .name
df.name = "COVID"
df_full.name = "Full"

# Back‑test on both panels
eval_cov,  oos_cov,  live_cov  = backtest_ewma(df)
eval_full, oos_full, live_full = backtest_ewma(df_full)

pd.set_option("display.float_format", "{:.3f}".format)

print("\n--- COVID‑filtered panel: EWMA grid search ---")
print(eval_cov.sort_values(["window", "decay"]).to_string(index=False))

print("\n--- Full panel: EWMA grid search ---")
print(eval_full.sort_values(["window", "decay"]).to_string(index=False))

# Pick robust‑winner on COVID panel
criteria = (
    (eval_cov["DM_p"] < 0.10)
    & (eval_cov["Binom_p"] < 0.10)
    & (eval_cov["PT_p"] < 0.10)
)
candidates = eval_cov[criteria]
winner = (
    candidates if not candidates.empty else eval_cov
).loc[lambda d: d["RMSE_smart"].idxmin()]

w_best = int(winner["window"])
d_best = float(winner["decay"])

print(
    f"\n>>> Robust‑winner specification (COVID panel): "
    f"window = {w_best} months, decay = {d_best:.2f}"
)

# Stratified robustness on Full panel
regimes = {
    "2002‑04 to 2007‑12 (pre‑GFC)": ("2002-04-01", "2007-12-31"),
    "2008‑01 to 2009‑12 (GFC)": ("2008-01-01", "2009-12-31"),
    "2010‑01 to 2019‑12 (expansion)": ("2010-01-01", "2019-12-31"),
    "2020‑01 to 2022‑12 (COVID)": ("2020-01-01", "2022-12-31"),
    "2023‑01 to 2025‑12 (post‑COVID)": ("2023-01-01", "2025-12-31"),
}

key = (w_best, d_best)
if key in oos_full:
    strat_tbl = stratified_ewma(oos_full[key], regimes)
    print(
        f"\n--- Stratified robustness (Full panel • window={w_best}, "
        f"decay={d_best:.2f}) ---"
    )
    if strat_tbl.empty:
        print("No realised data in these periods.")
    else:
        print(strat_tbl.to_string(index=False))
else:
    print("\nNo matching OOS predictions for stratified check.")

# Live forecast for the robust‑winner spec
live_match = live_cov[
    (live_cov["window"] == w_best) & (live_cov["decay"] == d_best)
]
if live_match.empty:
    print("\nNo unreleased month – all actuals available.")
else:
    row = live_match.iloc[-1]
    direction = "Beat" if row["pred_dir"] else "Miss"
    print("\n--- LIVE EWMA FORECAST (COVID panel robust winner) ---")
    print(f"Date      : {row['date'].date()}")
    print(f"Smart     : {row['smart']:.1f} k jobs")
    print(f"Median    : {row['median']:.1f} k jobs")
    print(
        f"Direction : {direction}  "
        f"(window={w_best} months, decay={d_best:.2f})"
    )


COVID grid:   0%|          | 0/15 [00:00<?, ?it/s]

Full grid:   0%|          | 0/15 [00:00<?, ?it/s]


--- COVID‑filtered panel: EWMA grid search ---
panel  window  decay  obs  RMSE_smart  RMSE_median  HitRate  Binom_p  PT_p  DM_p
COVID       3  0.750 1032      81.449       54.680    0.554    0.001 0.000 0.235
COVID       3  0.800 1032      81.480       54.680    0.555    0.000 0.000 0.233
COVID       3  0.850 1032      81.508       54.680    0.556    0.000 0.000 0.231
COVID       3  0.900 1032      81.535       54.680    0.559    0.000 0.000 0.229
COVID       3  0.950 1032      81.559       54.680    0.560    0.000 0.000 0.228
COVID       6  0.750 1019      76.647       54.694    0.558    0.000 0.000 0.169
COVID       6  0.800 1019      76.641       54.694    0.556    0.000 0.000 0.173
COVID       6  0.850 1019      76.637       54.694    0.554    0.001 0.001 0.175
COVID       6  0.900 1019      76.637       54.694    0.554    0.001 0.001 0.176
COVID       6  0.950 1019      76.639       54.694    0.549    0.002 0.002 0.175
COVID      12  0.750  969      78.032       54.345    0.558  

### 1.3 Soft-BMA with student-t plug-in likelihood

In [7]:
# --------------------------- KNOBS --------------------------
WINDOWS  = [3, 6, 12]                    # rolling windows (months)
NU_GRID  = [3, 5, 10, 20, 50]         # Student‑t degrees of freedom
RIDGE    = 1e-6                       # stabiliser in inverse‑MSE
REGIMES  = {                          # stratification buckets
    "2002‑04 to 2007‑12 (pre‑GFC)": ("2002-04-01", "2007-12-31"),
    "2008‑01 to 2009‑12 (GFC)": ("2008-01-01", "2009-12-31"),
    "2010‑01 to 2019‑12 (expansion)": ("2010-01-01", "2019-12-31"),
    "2020‑01 to 2022‑12 (COVID)": ("2020-01-01", "2022-12-31"),
    "2023‑01 to 2025‑12 (post‑COVID)": ("2023-01-01", "2025-12-31"),
}

np.seterr(all="ignore")


# ----------------------- HELPER FUNCTIONS ---------------------
def soft_bma_weights(errors: dict, nu: int) -> pd.Series:
    """
    Compute soft‑BMA weights for one window of errors.
    """
    log_like = {}
    for econ, err in errors.items():
        if err.size and np.std(err, ddof=1) > 0.0:
            sigma = np.std(err, ddof=1)
            log_like[econ] = student_t.logpdf(err, df=nu, loc=0.0, scale=sigma).sum()

    if not log_like:
        return pd.Series(dtype=float)

    raw = np.array(list(log_like.values()))
    w   = pd.Series(np.exp(raw - raw.max()), index=list(log_like.keys()))
    return w / w.sum()


def backtest_soft_bma(
    panel: pd.DataFrame,
    windows=WINDOWS,
    nu_grid=NU_GRID,
):
    """
    Back‑test Student‑t soft‑BMA for all (window, nu) specs.
    """
    panel_name = getattr(panel, "name", "panel")
    dates = np.sort(panel["release_date"].unique())

    eval_rows, live_rows, oos_map = [], [], {}

    for window, nu in tqdm(
        product(windows, nu_grid),
        total=len(windows) * len(nu_grid),
        desc=f"{panel_name} grid",
    ):
        per_release = []

        for idx in range(window, len(dates)):
            t_date   = dates[idx]
            hist_idx = dates[idx - window : idx]
            hist     = panel[panel["release_date"].isin(hist_idx)]

            # economists with complete coverage in the window
            econs = (
                hist.groupby("economist")["forecast"]
                .apply(lambda s: s.notna().all())
                .pipe(lambda s: s[s].index)
            )
            if econs.empty:
                continue

            # gather error arrays
            err_dict = {}
            for econ in econs:
                err_vec = (
                    hist.loc[hist["economist"] == econ]
                    .sort_values("release_date")["error"]
                    .values
                )
                if len(err_vec) == window:
                    err_dict[econ] = err_vec

            if not err_dict:
                continue

            weights = soft_bma_weights(err_dict, nu)
            if weights.empty:
                continue

            # align with current forecasts
            cur = panel[
                (panel["release_date"] == t_date)
                & (panel["economist"].isin(weights.index))
            ]
            f_t = cur.set_index("economist")["forecast"].dropna()
            w   = weights.reindex(f_t.index).dropna()
            if w.empty:
                continue
            w /= w.sum()

            smart   = np.dot(w, f_t.loc[w.index])
            median  = panel.loc[
                panel["release_date"] == t_date, "forecast"
            ].dropna().median()
            actual  = panel.loc[
                panel["release_date"] == t_date, "actual"
            ].iloc[0]

            per_release.append((t_date, smart, median, actual))

        if not per_release:
            continue

        oos = pd.DataFrame(
            per_release,
            columns=["date", "smart", "median", "actual"],
        )
        oos_map[(window, nu)] = oos

        # live (unreleased) forecast
        unreleased = oos[oos["actual"].isna()]
        if not unreleased.empty:
            last = unreleased.iloc[-1]
            live_rows.append(
                {
                    "panel": panel_name,
                    "window": window,
                    "nu": nu,
                    "date": last["date"],
                    "smart": last["smart"],
                    "median": last["median"],
                }
            )

        # realised months only
        eval_df = oos.dropna(subset=["actual"]).copy()
        if eval_df.empty:
            continue

        eval_df["smart_err"]  = eval_df["smart"]  - eval_df["actual"]
        eval_df["median_err"] = eval_df["median"] - eval_df["actual"]
        eval_df["pred_dir"]   = (eval_df["smart"]  > eval_df["median"]).astype(int)
        eval_df["actual_dir"] = (eval_df["actual"] > eval_df["median"]).astype(int)

        obs = len(eval_df)
        rmse_smart  = np.sqrt((eval_df["smart_err"] ** 2).mean())
        rmse_median = np.sqrt((eval_df["median_err"] ** 2).mean())

        diff    = eval_df["smart_err"] ** 2 - eval_df["median_err"] ** 2
        dm_stat = diff.mean() / diff.std(ddof=1) * np.sqrt(obs)
        dm_p    = 2 * (1 - norm.cdf(abs(dm_stat)))

        hits     = (eval_df["pred_dir"] == eval_df["actual_dir"]).sum()
        hit_rate = hits / obs
        binom_p  = binomtest(hits, obs, 0.5).pvalue

        p1, p2  = eval_df["pred_dir"].mean(), eval_df["actual_dir"].mean()
        c_joint = (eval_df["pred_dir"] & eval_df["actual_dir"]).mean()
        pt_stat = (c_joint - p1 * p2) / np.sqrt(p1 * p2 * (1 - p1) * (1 - p2) / obs)
        pt_p    = 2 * (1 - norm.cdf(abs(pt_stat)))

        eval_rows.append(
            {
                "panel": panel_name,
                "window": window,
                "nu": nu,
                "obs": obs,
                "RMSE_smart": rmse_smart,
                "RMSE_median": rmse_median,
                "HitRate": hit_rate,
                "Binom_p": binom_p,
                "PT_p": pt_p,
                "DM_p": dm_p,
            }
        )

    return pd.DataFrame(eval_rows), oos_map, pd.DataFrame(live_rows)


def stratified_soft_bma(oos_df: pd.DataFrame, regimes: dict) -> pd.DataFrame:
    """Directional and DM diagnostics across user‑defined regimes."""
    rows = []
    for label, (start, end) in regimes.items():
        sub = oos_df[
            (oos_df["date"] >= start) & (oos_df["date"] <= end) & oos_df["actual"].notna()
        ]
        if sub.empty:
            continue

        sub = sub.copy()
        sub["smart_err"]  = sub["smart"]  - sub["actual"]
        sub["median_err"] = sub["median"] - sub["actual"]
        sub["pred_dir"]   = (sub["smart"] > sub["median"]).astype(int)
        sub["actual_dir"] = (sub["actual"] > sub["median"]).astype(int)

        obs = len(sub)
        rmse_smart  = np.sqrt((sub["smart_err"] ** 2).mean())
        rmse_median = np.sqrt((sub["median_err"] ** 2).mean())

        diff    = sub["smart_err"] ** 2 - sub["median_err"] ** 2
        dm_stat = diff.mean() / diff.std(ddof=1) * np.sqrt(obs)
        dm_p    = 2 * (1 - norm.cdf(abs(dm_stat)))

        hits     = (sub["pred_dir"] == sub["actual_dir"]).sum()
        hit_rate = hits / obs
        binom_p  = binomtest(hits, obs, 0.5).pvalue

        p1, p2  = sub["pred_dir"].mean(), sub["actual_dir"].mean()
        c_joint = (sub["pred_dir"] & sub["actual_dir"]).mean()
        pt_stat = (c_joint - p1 * p2) / np.sqrt(p1 * p2 * (1 - p1) * (1 - p2) / obs)
        pt_p    = 2 * (1 - norm.cdf(abs(pt_stat)))

        rows.append(
            {
                "Regime": label,
                "Obs": obs,
                "RMSE_smart": rmse_smart,
                "RMSE_median": rmse_median,
                "HitRate": hit_rate,
                "Binom_p": binom_p,
                "PT_p": pt_p,
                "DM_p": dm_p,
            }
        )
    return pd.DataFrame(rows)


# --------------------------- DRIVER ----------------------------
# Tag DataFrames so helpers can reference .name
df.name = "COVID-filtered panel"
df_full.name = "Full panel"

# Back‑tests
eval_cov,  oos_cov,  live_cov  = backtest_soft_bma(df)
eval_full, oos_full, live_full = backtest_soft_bma(df_full)

pd.set_option("display.float_format", "{:.3f}".format)

print("\n--- COVID‑filtered panel: Student‑t soft‑BMA grid ---")
print(eval_cov.sort_values(["window", "nu"]).to_string(index=False))

print("\n--- Full panel: Student‑t soft‑BMA grid ---")
print(eval_full.sort_values(["window", "nu"]).to_string(index=False))

# Robust‑winner selection on COVID panel
criteria = eval_cov["PT_p"] < 0.05
candidates = eval_cov[criteria]
winner = (
    candidates if not candidates.empty else eval_cov
).loc[lambda d: d["RMSE_smart"].idxmin()]

w_best = int(winner["window"])
nu_best = int(winner["nu"])

print(
    f"\n>>> Robust‑winner spec (COVID panel): "
    f"window = {w_best} months, nu = {nu_best}"
)

# Stratified robustness on full panel
key = (w_best, nu_best)
if key in oos_full:
    strat_tbl = stratified_soft_bma(oos_full[key], REGIMES)
    print(
        f"\n--- Stratified robustness (Full panel • window={w_best}, nu={nu_best}) ---"
    )
    if strat_tbl.empty:
        print("No realised data in these periods.")
    else:
        print(strat_tbl.to_string(index=False))
else:
    print("\nNo matching OOS predictions for stratified check.")

# Live forecast for the robust‑winner
live_match = live_cov[
    (live_cov["window"] == w_best) & (live_cov["nu"] == nu_best)
]
if live_match.empty:
    print("\nNo unreleased month – all actuals available.")
else:
    row = live_match.iloc[-1]
    direction = "Beat" if row["smart"] > row["median"] else "Miss"
    print("\n--- LIVE soft‑BMA FORECAST (COVID panel robust winner) ---")
    print(f"Date      : {pd.to_datetime(row['date']).date()}")
    print(f"Smart     : {row['smart']:.1f} k jobs")
    print(f"Median    : {row['median']:.1f} k jobs")
    print(
        f"Direction : {direction}  "
        f"(window={w_best} months, nu={nu_best})"
    )


COVID-filtered panel grid:   0%|          | 0/15 [00:00<?, ?it/s]

Full panel grid:   0%|          | 0/15 [00:00<?, ?it/s]


--- COVID‑filtered panel: Student‑t soft‑BMA grid ---
               panel  window  nu  obs  RMSE_smart  RMSE_median  HitRate  Binom_p  PT_p  DM_p
COVID-filtered panel       3   3 1032      82.542       61.284    0.525    0.112 0.106 0.144
COVID-filtered panel       3   5 1032      82.593       61.284    0.526    0.099 0.093 0.143
COVID-filtered panel       3  10 1032      82.646       61.284    0.531    0.050 0.044 0.142
COVID-filtered panel       3  20 1032      82.681       61.284    0.532    0.043 0.039 0.141
COVID-filtered panel       3  50 1032      82.707       61.284    0.531    0.050 0.044 0.141
COVID-filtered panel       6   3 1019      76.926       61.388    0.540    0.012 0.011 0.274
COVID-filtered panel       6   5 1019      76.906       61.388    0.543    0.007 0.006 0.274
COVID-filtered panel       6  10 1019      76.921       61.388    0.543    0.007 0.006 0.274
COVID-filtered panel       6  20 1019      76.987       61.388    0.545    0.005 0.004 0.271
COVID-filtered 