# Week 2 — Model vs Actuals: Grading Notebook 📊

This notebook evaluates **Week 2** props predictions against realized outcomes to identify best‑performing markets (and overall performance).

**Inputs**
- `data/props/props_with_model_week2.csv` (model outputs + book market)
- `data/actuals/week2.csv` (built earlier)

**Outputs**
- `data/eval/grades_week2.csv` — row-level joined dataset with metrics
- `data/eval/market_perf_week2.csv` — per-market summary (hit rate, Brier, ROI, etc.)

**Metrics**
- Accuracy (hit rate)
- Brier score (lower is better)
- Log loss (lower is better; ignores rows with p∈{0,1})
- ROI per $1 using market odds when available (fallback from mkt_prob)
- Edge direction capture rate (did results agree with model edge sign?)
- Correlation between `model_prob` and outcome


In [1]:

from pathlib import Path
import pandas as pd
import numpy as np

WEEK = 2
BASE = Path.cwd()

props_path   = BASE / f"data/props/props_with_model_week{WEEK}.csv"
actuals_path = BASE / f"data/actuals/week{WEEK}.csv"
out_grades   = BASE / f"data/eval/grades_week{WEEK}.csv"
out_markets  = BASE / f"data/eval/market_perf_week{WEEK}.csv"

print("[paths]")
print(" props   :", props_path.exists(), props_path)
print(" actuals :", actuals_path.exists(), actuals_path)

if not props_path.exists() or not actuals_path.exists():
    raise SystemExit("Missing inputs; ensure Week 2 props and actuals exist.")
    
dfp = pd.read_csv(props_path)
dfa = pd.read_csv(actuals_path)
print("[shape] props:", dfp.shape, " actuals:", dfa.shape)

# normalize key columns
def has(df, cols): return all(c in df.columns for c in cols)

join_cols = ["player_key","market_std","side"]
use_point = "point_key" in dfp.columns and "point_key" in dfa.columns
if use_point:
    join_cols += ["point_key"]

missing = [c for c in join_cols if c not in dfp.columns or c not in dfa.columns]
if missing:
    # fallback: ignore point_key if missing
    join_cols = [c for c in ["player_key","market_std","side"] if c in dfp.columns and c in dfa.columns]
    print("[warn] using fallback join keys:", join_cols)

merged = dfp.merge(dfa, on=join_cols, how="left", suffixes=("", "_act"))
print("[merge] rows:", len(merged), " matched results:", merged["result"].notna().sum())

# Helper conversions
def amer_to_prob(oa):
    if pd.isna(oa): return np.nan
    oa = float(oa)
    return (-oa)/((-oa)+100.0) if oa < 0 else 100.0/(oa+100.0)

def amer_to_decimal(oa):
    if pd.isna(oa): return np.nan
    oa = float(oa)
    return (1 + 100.0/(-oa)) if oa < 0 else (1 + oa/100.0)

# Column mapping
c_side  = next((c for c in ["side","bet_side","ou_side","yes_no_side"] if c in merged.columns), "side")
c_pmod  = next((c for c in ["model_prob","model_p","p_model","pred_prob"] if c in merged.columns), None)
c_pbook = next((c for c in ["mkt_prob","consensus_prob","book_implied_prob"] if c in merged.columns), None)
c_odds  = next((c for c in ["odds_american","mkt_odds_american","american_odds","price"] if c in merged.columns), None)

if c_pmod is None:
    raise SystemExit("Missing model probability column (expected one of model_prob/model_p/p_model/pred_prob)")

# Coerce probs
merged[c_pmod]  = pd.to_numeric(merged[c_pmod], errors="coerce").clip(0,1)
if c_pbook: merged[c_pbook] = pd.to_numeric(merged[c_pbook], errors="coerce").clip(0,1)
if c_odds:
    merged[c_odds] = pd.to_numeric(merged[c_odds], errors="coerce")

# Fill market prob from odds if needed
if c_pbook and merged[c_pbook].isna().any():
    merged[c_pbook] = merged[c_pbook].fillna(merged[c_odds].apply(amer_to_prob) if c_odds else np.nan)

# Effective model vs market probs conditioned on side (UNDER/NO gets 1-p)
side = merged[c_side].astype(str).str.lower()
yes_like = side.isin(["over","yes"])
p_model_eff = merged[c_pmod].where(yes_like, 1.0 - merged[c_pmod])
p_mkt_eff   = (merged[c_pbook].where(yes_like, 1.0 - merged[c_pbook])) if c_pbook else np.nan

# Metrics per row
merged["brier"] = (merged[c_pmod] - merged["result"])**2
eps = 1e-9
p_safe = merged[c_pmod].clip(eps, 1-eps)
merged["logloss"] = -(merged["result"]*np.log(p_safe) + (1-merged["result"])*np.log(1-p_safe))

# ROI using odds if available, else implied from mkt_prob
if c_odds:
    merged["decimal_odds"] = merged[c_odds].apply(amer_to_decimal)
else:
    merged["decimal_odds"] = np.nan

no_price = merged["decimal_odds"].isna()
if no_price.any():
    # fair odds from consensus prob (no-vig approx)
    if c_pbook:
        merged.loc[no_price & merged[c_pbook].gt(0), "decimal_odds"] = 1.0 / merged.loc[no_price, c_pbook]

merged["ev_realized_per_$1"] = merged["result"] * (merged["decimal_odds"] - 1.0) - (1 - merged["result"]) * 1.0

# Edge sign capture
merged["edge_dir"] = (p_model_eff - p_mkt_eff) if c_pbook else np.nan
merged["edge_hit"] = (((merged["edge_dir"] > 0) & (merged["result"] == 1)) | ((merged["edge_dir"] < 0) & (merged["result"] == 0))).astype("float")

# Correlation (model prob vs result)
try:
    corr = float(pd.Series(merged[c_pmod]).corr(pd.Series(merged["result"])))
except Exception:
    corr = np.nan

print(f"[overall] rows={len(merged)}  hit={merged['result'].mean():.3f}  "
      f"brier={merged['brier'].mean():.4f}  logloss={merged['logloss'].mean():.4f}  "
      f"roi/1={merged['ev_realized_per_$1'].mean():.4f}  corr(p, y)={corr:.3f}")

# Write out grades (selected columns)
keep = ["player_key","market_std","side","point_key"] if "point_key" in merged.columns else ["player_key","market_std","side"]
keep += [c for c in [c_pmod, c_pbook, c_odds, "decimal_odds", "result", "brier", "logloss", "edge_dir", "edge_hit", "ev_realized_per_$1"] if c in merged.columns]
grades = merged[keep].copy()
out_grades.parent.mkdir(parents=True, exist_ok=True)
grades.to_csv(out_grades, index=False)
print("[write] grades ->", out_grades)
grades.head(5)


[paths]
 props   : False /Users/pwitt/fourth-and-value/notebooks/data/props/props_with_model_week2.csv
 actuals : False /Users/pwitt/fourth-and-value/notebooks/data/actuals/week2.csv


SystemExit: Missing inputs; ensure Week 2 props and actuals exist.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:

# Per-market summary: hit rate, Brier, log loss, edge_hit, ROI
def safemean(s):
    s = pd.to_numeric(s, errors="coerce")
    return s.dropna().mean()

perf = grades.groupby("market_std", dropna=False).agg(
    n=("result","count"),
    hit=("result","mean"),
    brier=("brier", safemean),
    logloss=("logloss", safemean),
    edge_hit_rate=("edge_hit", safemean),
    avg_edge_dir=("edge_dir", safemean),
    roi_per_$1=("ev_realized_per_$1", safemean),
).reset_index()

# Rank views
top_by_roi   = perf.sort_values(["roi_per_$1","n"], ascending=[False, False]).head(12)
top_by_brier = perf.sort_values(["brier","n"], ascending=[True, False]).head(12)

display(perf.sort_values("roi_per_$1", ascending=False).head(20))
display(top_by_roi)
display(top_by_brier)

# Write market summary
perf.to_csv(out_markets, index=False)
print("[write] per-market ->", out_markets)


SyntaxError: positional argument follows keyword argument (1229647912.py, line 13)

### Tweet-ready snippets

Run the next cell to generate short, plain‑text summaries you can paste into a tweet or thread.
- One “overall” line
- Up to 5 best markets by ROI (min n=25 to avoid tiny samples; tweak as needed)


In [3]:

overall = {
    "n": int(grades["result"].count()),
    "hit": float(grades["result"].mean()),
    "brier": float(pd.to_numeric(grades["brier"], errors="coerce").mean()),
    "logloss": float(pd.to_numeric(grades["logloss"], errors="coerce").mean()),
    "roi": float(pd.to_numeric(grades["ev_realized_per_$1"], errors="coerce").mean()),
}

tweet_lines = []
tweet_lines.append(f"Wk2 model vs actuals — n={overall['n']}, hit={overall['hit']:.3f}, "
                   f"Brier={overall['brier']:.3f}, LogLoss={overall['logloss']:.3f}, ROI/1={overall['roi']:.3f}.")

pf = perf.copy()
pf = pf[pf["n"] >= 25]  # avoid tiny samples
pf = pf.sort_values("roi_per_$1", ascending=False).head(5)
for _, r in pf.iterrows():
    tweet_lines.append(f"{r['market_std']}: n={int(r['n'])}, hit={r['hit']:.3f}, ROI/1={r['roi_per_$1']:.3f}, Brier={r['brier']:.3f}")

print("\n".join(tweet_lines))


NameError: name 'grades' is not defined

### (Optional) Simple bar chart — ROI by market

> Uses matplotlib (no styles/colors).

In [None]:

import matplotlib.pyplot as plt

pf = perf.sort_values("roi_per_$1", ascending=False).head(12)
plt.figure()
plt.bar(pf["market_std"].astype(str), pf["roi_per_$1"])
plt.title("Week 2 — ROI per $1 by Market (Top 12)")
plt.xticks(rotation=45, ha="right")
plt.ylabel("ROI per $1")
plt.tight_layout()
plt.show()
