# Forecasting Consensus Expectations: Initial Jobless Claims
## Point and Directional Forecasts

**Imports**

In [1]:
import os
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st

from tqdm.auto import tqdm
from scipy import stats, special
from scipy.optimize import brentq
from collections import defaultdict
from itertools import product
from scipy.stats import t as student_t, norm, binomtest, jarque_bera
from arch.univariate import ConstantMean, GARCH, StudentsT
from arch.univariate.base import ConvergenceWarning
from IPython.display import display, Markdown

In [2]:
OUT_DIR = "../out"         
DF_FILE       = "ijc_df.parquet"
DF_FULL_FILE  = "ijc_df_full.parquet"

df       = pd.read_parquet(os.path.join(OUT_DIR, DF_FILE),      engine="pyarrow")
df_full  = pd.read_parquet(os.path.join(OUT_DIR, DF_FULL_FILE), engine="pyarrow")

print("df shape     :", df.shape)
print("df_full shape:", df_full.shape)

df shape     : (180681, 10)
df_full shape: (213651, 10)


In [8]:
# --------------------------------------------------------------
# IJC majority-vote ensemble
# • windows: 4, 12, 24 releases  (≈ 1, 3, 6 months)
# • methods: equal, inverse-MAE, inverse-MSE
# • metrics: point forecast, RMSE, directional hit-rate
# --------------------------------------------------------------
import numpy as np
import pandas as pd
from itertools import product
from tqdm.auto import tqdm
import warnings

warnings.filterwarnings("ignore")
np.seterr(all="ignore")

# ---------- config ----------
PANEL   = df_full.copy()          # or df (COVID-filtered)
WINDOWS = [4, 12, 24]
METHODS = ["equal", "inv_mae", "inv_mse"]
RIDGE   = 1e-6

# ---------- helper ----------
def calc_weights(hist: pd.DataFrame, method: str) -> pd.Series:
    """Return Series of weights indexed by economist."""
    if method == "equal":
        n = hist["economist"].nunique()
        return pd.Series(1.0 / n, index=hist["economist"].unique())

    if method == "inv_mse":
        score = (hist.groupby("economist")["error"]
                       .apply(lambda s: np.nanmean(np.square(s))))
    else:                                   # inv_mae
        score = (hist.groupby("economist")["error"]
                       .apply(lambda s: np.nanmean(np.abs(s))))
    w = 1.0 / (score + RIDGE)
    return w / w.sum()

# ---------- walk-forward ----------
dates = np.sort(PANEL["release_date"].unique())
records = []

outer = list(product(WINDOWS, METHODS))
for win, meth in tqdm(outer, desc="window × method", total=len(outer)):
    for idx in tqdm(range(win, len(dates)),
                    desc=f"{win}-wk {meth}", leave=False,
                    total=len(dates) - win):

        t = dates[idx]
        hist = PANEL[PANEL["release_date"].isin(dates[idx-win:idx])]

        # contiguity
        elig = (hist.groupby("economist")["forecast"]
                     .apply(lambda s: s.notna().all()))
        econs = elig[elig].index
        if econs.empty:
            continue

        w = calc_weights(hist[hist["economist"].isin(econs)], meth)

        cur = PANEL[(PANEL["release_date"] == t) &
                    (PANEL["economist"].isin(w.index))]
        f_t = cur.set_index("economist")["forecast"].dropna()
        w   = w.reindex(f_t.index).dropna()
        if w.empty:
            continue
        w /= w.sum()

        point  = np.dot(w, f_t.loc[w.index])
        median = PANEL.loc[PANEL["release_date"] == t,
                           "forecast"].dropna().median()
        actual = PANEL.loc[PANEL["release_date"] == t,
                           "actual"].iloc[0]

        records.append({
            "window": win, "method": meth, "date": t,
            "point": point, "median": median, "actual": actual
        })

results = pd.DataFrame(records)

# ---------- evaluate each method ----------
eval_rows = []
for win, meth in product(WINDOWS, METHODS):
    sub = results[(results["window"] == win) & (results["method"] == meth)
                  & results["actual"].notna()].copy()
    if sub.empty:
        continue

    sub["error"]    = sub["point"] - sub["actual"]
    sub["dir_pred"] = (sub["point"] > sub["median"]).astype(int)
    sub["dir_act"]  = (sub["actual"] > sub["median"]).astype(int)

    obs   = len(sub)
    rmse  = np.sqrt((sub["error"]**2).mean())
    hits  = (sub["dir_pred"] == sub["dir_act"]).sum()
    eval_rows.append({"window": win, "method": meth,
                      "obs": obs, "RMSE": rmse,
                      "HitRate": hits / obs})

summary = (pd.DataFrame(eval_rows)
           .sort_values(["window", "method"])
           .reset_index(drop=True))

print("=== Individual-method performance ===")
print(summary.to_string(index=False, float_format="{:.3f}".format))

# ---------- choose best window ----------
best_win = (summary.groupby("window")["HitRate"].mean()
                     .idxmax())
print(f"\nChosen window for ensemble = {best_win} releases")

# ---------- majority vote ----------
ensemble_rows = []
for t in dates[best_win:]:
    slice_ = results[(results["window"] == best_win) &
                     (results["date"] == t)]
    if slice_.shape[0] < 3 or pd.isna(slice_["actual"].iloc[0]):
        continue

    votes = (slice_["point"] > slice_["median"]).astype(int).values
    robust_dir = int(votes.sum() >= 2)
    point_mean = slice_["point"].mean()

    actual = slice_["actual"].iloc[0]
    error  = point_mean - actual
    dir_act = int(actual > slice_["median"].iloc[0])

    ensemble_rows.append({"date": t, "point": point_mean,
                          "error": error,
                          "dir_pred": robust_dir,
                          "dir_act": dir_act})

robust_df = pd.DataFrame(ensemble_rows)
robust_rmse = np.sqrt((robust_df["error"]**2).mean())
robust_rate = (robust_df["dir_pred"] == robust_df["dir_act"]).mean()

print(f"\n=== Majority-vote ensemble (window {best_win}) ===")
print(f"Observations          : {len(robust_df)}")
print(f"RMSE (ensemble point) : {robust_rmse:.3f}")
print(f"Hit-rate (direction)  : {robust_rate:.3f}")

# ---------- latest unreleased week ----------
latest = results[(results["window"] == best_win) &
                 results["actual"].isna()]
if not latest.empty:
    print("\n--- Latest unreleased week ---")
    for m in METHODS:
        pt = latest.loc[latest["method"] == m, "point"].iloc[0]
        print(f"{m:8s} point forecast: {pt:.1f}")
    mv_dir = "Beat" if (latest["point"] > latest["median"]).mean() >= 0.67 else "Miss"
    print(f"Majority vote direction: {mv_dir}")


window × method:   0%|          | 0/9 [00:00<?, ?it/s]

4-wk equal:   0%|          | 0/1015 [00:00<?, ?it/s]

4-wk inv_mae:   0%|          | 0/1015 [00:00<?, ?it/s]

4-wk inv_mse:   0%|          | 0/1015 [00:00<?, ?it/s]

12-wk equal:   0%|          | 0/1007 [00:00<?, ?it/s]

12-wk inv_mae:   0%|          | 0/1007 [00:00<?, ?it/s]

12-wk inv_mse:   0%|          | 0/1007 [00:00<?, ?it/s]

24-wk equal:   0%|          | 0/995 [00:00<?, ?it/s]

24-wk inv_mae:   0%|          | 0/995 [00:00<?, ?it/s]

24-wk inv_mse:   0%|          | 0/995 [00:00<?, ?it/s]

=== Individual-method performance ===
 window  method  obs    RMSE  HitRate
      4   equal 1014 116.295    0.551
      4 inv_mae 1014  96.728    0.562
      4 inv_mse 1014  76.064    0.551
     12   equal 1006 116.154    0.562
     12 inv_mae 1006 105.431    0.564
     12 inv_mse 1006  92.361    0.566
     24   equal  994  86.074    0.543
     24 inv_mae  994  80.975    0.548
     24 inv_mse  994  79.111    0.550

Chosen window for ensemble = 12 releases

=== Majority-vote ensemble (window 12) ===
Observations          : 1006
RMSE (ensemble point) : 104.213
Hit-rate (direction)  : 0.562

--- Latest unreleased week ---
equal    point forecast: 234.1
inv_mae  point forecast: 233.8
inv_mse  point forecast: 233.6
Majority vote direction: Miss


In [9]:
# ---------- evaluate each method ----------

eval_rows = []
baseline_rows = []              # store crowd-median metrics per window

for win, meth in product(WINDOWS, METHODS):
    # subset with realised actuals
    sub = results[
        (results["window"] == win) &
        (results["method"] == meth) &
        results["actual"].notna()
    ].copy()
    if sub.empty:
        continue

    # ensemble metrics
    sub["error"]    = sub["point"] - sub["actual"]
    sub["dir_pred"] = (sub["point"]  > sub["median"]).astype(int)
    sub["dir_act"]  = (sub["actual"] > sub["median"]).astype(int)

    obs   = len(sub)
    rmse  = np.sqrt((sub["error"]**2).mean())
    hits  = (sub["dir_pred"] == sub["dir_act"]).sum()

    eval_rows.append(
        {"window": win, "method": meth, "obs": obs,
         "RMSE": rmse, "HitRate": hits / obs}
    )

    # ---------- baseline crowd-median for THIS window ----------
    # only compute once per window (equal across methods)
    if meth == METHODS[0]:      # first pass for this window
        base_err   = sub["median"] - sub["actual"]
        base_rmse  = np.sqrt((base_err**2).mean())
        base_hits  = (sub["dir_act"] == 0).mean()  # median vs itself ⇒ always miss
        baseline_rows.append(
            {"window": win, "method": "crowd_median",
             "obs": obs, "RMSE": base_rmse, "HitRate": np.nan}
        )

# combine frames: ensemble methods + crowd baseline
summary = (
    pd.concat([pd.DataFrame(eval_rows), pd.DataFrame(baseline_rows)])
      .sort_values(["window", "method"])
      .reset_index(drop=True)
)

print("=== Individual-method performance (incl. crowd baseline) ===")
print(summary.to_string(index=False, float_format="{:.3f}".format))


=== Individual-method performance (incl. crowd baseline) ===
 window       method  obs    RMSE  HitRate
      4 crowd_median 1014 122.423      NaN
      4        equal 1014 116.295    0.551
      4      inv_mae 1014  96.728    0.562
      4      inv_mse 1014  76.064    0.551
     12 crowd_median 1006 122.902      NaN
     12        equal 1006 116.154    0.562
     12      inv_mae 1006 105.431    0.564
     12      inv_mse 1006  92.361    0.566
     24 crowd_median  994 123.623      NaN
     24        equal  994  86.074    0.543
     24      inv_mae  994  80.975    0.548
     24      inv_mse  994  79.111    0.550
