# Week 2 — Build Actuals ➜ Grade Model Performance (All-in-One) 🚀

This notebook does **everything** for Week 2 in one run:

1. Load Week 2 props (`data/props/props_with_model_week2.csv`)
2. Load weekly player stats (`data/weekly_player_stats_2025.parquet` or `.csv.gz`)
3. Build actuals → `data/actuals/week2.csv`
4. Grade model vs actuals → write `data/eval/grades_week2.csv` and `data/eval/market_perf_week2.csv`
5. Show best markets and print **tweet-ready** summary


In [3]:

from pathlib import Path
import pandas as pd
import numpy as np

# --- Params ---
WEEK = 2
SEASON = 2025

# --- Repo base autodetect (folder with Makefile + data/) ---
BASE = Path.cwd()
for p in [BASE] + list(BASE.parents):
    if (p/"Makefile").exists() and (p/"data").exists():
        BASE = p
        break
print("[BASE]", BASE)

props_path = BASE / f"data/props/props_with_model_week{WEEK}.csv"
stats_parq = BASE / "data/weekly_player_stats_2025.parquet"
stats_csvz = BASE / "data/weekly_player_stats_2025.csv.gz"
actuals_out = BASE / f"data/actuals/week{WEEK}.csv"
grades_out  = BASE / f"data/eval/grades_week{WEEK}.csv"
market_out  = BASE / f"data/eval/market_perf_week{WEEK}.csv"

print("[check] props:", props_path, props_path.exists())
print("[check] stats parquet:", stats_parq, stats_parq.exists())
print("[check] stats csv.gz :", stats_csvz, stats_csvz.exists())

if not props_path.exists():
    raise SystemExit(f"[ERR] missing props file: {props_path}\nRun your monday build first.")
if not (stats_parq.exists() or stats_csvz.exists()):
    raise SystemExit("[ERR] missing weekly player stats. Fetch parquet (preferred) or csv.gz before running.")

# --- Load props ---
dfp = pd.read_csv(props_path)
print("[props] shape:", dfp.shape)

# --- Load stats (prefer parquet) ---
if stats_parq.exists():
    dfs = pd.read_parquet(stats_parq)
else:
    dfs = pd.read_csv(stats_csvz)

print("[stats] shape (raw):", dfs.shape)
if {"season","week"}.issubset(dfs.columns):
    dfs = dfs[(dfs["season"]==SEASON) & (dfs["week"]==WEEK)]
    print("[stats] after season/week filter:", dfs.shape)
else:
    print("[warn] stats missing season/week columns; proceeding without filter")

# --- Join keys ---
left_key = None; right_key = None
if "player_key" in dfp.columns and "player_id" in dfs.columns:
    left_key, right_key = "player_key", "player_id"
else:
    # name fallback (less reliable)
    left_key  = next((c for c in ["player","name","player_name"] if c in dfp.columns), None)
    right_key = next((c for c in ["player_name","player","name","full_name"] if c in dfs.columns), None)
if not left_key or not right_key:
    raise SystemExit("[ERR] no compatible join keys (need player_key↔player_id or name↔name)")

print(f"[join] props.{left_key}  <->  stats.{right_key}")

# --- Keep minimal stats columns ---
need_stat_cols = {
    right_key,
    "rushing_yards","receiving_yards","passing_yards",
    "receptions","rushing_attempts","completions","attempts",
    "passing_tds","interceptions","rushing_tds","receiving_tds",
}
dfs_small = dfs[[c for c in need_stat_cols if c in dfs.columns]].copy()
print("[stats] using columns:", sorted(dfs_small.columns))

# --- Merge props to stats (left join keeps all props) ---
dfm = dfp.merge(dfs_small, left_on=left_key, right_on=right_key, how="left", suffixes=("","_stat"))
print("[merge] merged rows:", len(dfm))

# --- Market mapping & actuals computation ---
MAP = {
    # yards
    "rush_yds":"rushing_yards","rushing_yds":"rushing_yards","rush_yards":"rushing_yards","rushing_yards":"rushing_yards",
    "recv_yds":"receiving_yards","reception_yds":"receiving_yards","receiving_yards":"receiving_yards",
    "pass_yds":"passing_yards","passing_yards":"passing_yards",
    # counts
    "rec":"receptions","receptions":"receptions",
    "rush_att":"rushing_attempts","rush_attempts":"rushing_attempts","rushing_attempts":"rushing_attempts","carries":"rushing_attempts",
    "pass_cmp":"completions","pass_completions":"completions","completions":"completions",
    "pass_att":"attempts","pass_attempts":"attempts","attempts":"attempts",
    "pass_tds":"passing_tds","passing_tds":"passing_tds",
    "pass_ints":"interceptions","pass_interceptions":"interceptions","interceptions":"interceptions",
}
YESNO = {"anytime_td"}
UNSUPPORTED_PREFIX = ("first_td","first_team_td","rush_longest","reception_longest","longest_reception","longest_rush")

def norm_market(m):
    if pd.isna(m): return ""
    return str(m).strip().lower().replace(" ","_").replace("-","_")

def actual_value(row):
    m = norm_market(row.get("market_std",""))
    if m.startswith(UNSUPPORTED_PREFIX): return np.nan
    if m in YESNO:
        td = (row.get("rushing_tds",0) or 0) + (row.get("receiving_tds",0) or 0)
        return float(td)
    statcol = MAP.get(m, None)
    if statcol is None: return np.nan
    v = row.get(statcol, np.nan)
    try: return float(v)
    except Exception: return np.nan

def decide_result(side, actual, point):
    if pd.isna(actual): return np.nan
    s = (str(side) if side is not None else "").strip().lower()
    if s in ("yes","no"):
        yes = actual > 0
        return 1 if ((s=="yes" and yes) or (s=="no" and not yes)) else 0
    p = None
    try:
        p = float(point) if point not in (None,"") else None
    except Exception:
        p = None
    if p is None: return np.nan
    if s=="over":  return 1 if actual >= p else 0
    if s=="under": return 1 if actual <= p else 0
    return np.nan

have_point = "point_key" in dfm.columns
dfm["actual_value"] = dfm.apply(actual_value, axis=1)
dfm["result"] = [
    decide_result(row.get("side"), row.get("actual_value"), row.get("point_key") if have_point else None)
    for _, row in dfm.iterrows()
]

# --- Write actuals ---
actuals_out.parent.mkdir(parents=True, exist_ok=True)
out_cols = ["player_key","market_std","side","actual_value","result"]
if have_point: out_cols.insert(2,"point_key")
# fill player_key if missing via join key
if "player_key" not in dfm.columns and left_key in dfm.columns:
    dfm["player_key"] = dfm[left_key]
actuals = dfm[out_cols].copy()
actuals = actuals[actuals["actual_value"].notna()].reset_index(drop=True)
actuals.to_csv(actuals_out, index=False)
print(f"[write] actuals -> {actuals_out}  rows={len(actuals)}")

# === Grading section ===
def amer_to_prob(oa):
    if pd.isna(oa): return np.nan
    oa = float(oa)
    return (-oa)/((-oa)+100.0) if oa < 0 else 100.0/(oa+100.0)

def amer_to_decimal(oa):
    if pd.isna(oa): return np.nan
    oa = float(oa)
    return (1 + 100.0/(-oa)) if oa < 0 else (1 + oa/100.0)

# Re-merge props + freshly written actuals to be safe
dfa = actuals  # already aligned
merged = dfp.merge(dfa, on=[c for c in ["player_key","market_std","side"] + (["point_key"] if have_point else []) if c in dfp.columns and c in dfa.columns],
                   how="left", suffixes=("","_act"))
print("[grade] merged rows:", len(merged), " matched results:", merged["result"].notna().sum())

# Column mapping for metrics
c_side  = next((c for c in ["side","bet_side","ou_side","yes_no_side"] if c in merged.columns), "side")
c_pmod  = next((c for c in ["model_prob","model_p","p_model","pred_prob"] if c in merged.columns), None)
c_pbook = next((c for c in ["mkt_prob","consensus_prob","book_implied_prob"] if c in merged.columns), None)
c_odds  = next((c for c in ["odds_american","mkt_odds_american","american_odds","price"] if c in merged.columns), None)
if c_pmod is None: raise SystemExit("Missing model probability column (e.g., model_prob).")

# Coerce
merged[c_pmod]  = pd.to_numeric(merged[c_pmod], errors="coerce").clip(0,1)
if c_pbook: merged[c_pbook] = pd.to_numeric(merged[c_pbook], errors="coerce").clip(0,1)
if c_odds:  merged[c_odds]  = pd.to_numeric(merged[c_odds], errors="coerce")

# Fill market prob from odds if needed
if c_pbook and merged[c_pbook].isna().any():
    merged[c_pbook] = merged[c_pbook].fillna(merged[c_odds].apply(amer_to_prob) if c_odds else np.nan)

# Effective probs by side
side = merged[c_side].astype(str).str.lower()
yes_like = side.isin(["over","yes"])
p_model_eff = merged[c_pmod].where(yes_like, 1.0 - merged[c_pmod])
p_mkt_eff   = (merged[c_pbook].where(yes_like, 1.0 - merged[c_pbook])) if c_pbook else np.nan

# Metrics
eps = 1e-9
p_safe = merged[c_pmod].clip(eps, 1-eps)
merged["brier"] = (merged[c_pmod] - merged["result"])**2
merged["logloss"] = -(merged["result"]*np.log(p_safe) + (1-merged["result"])*np.log(1-p_safe))

# ROI via odds if present else from mkt_prob
if c_odds:
    merged["decimal_odds"] = merged[c_odds].apply(amer_to_decimal)
else:
    merged["decimal_odds"] = np.nan
no_price = merged["decimal_odds"].isna()
if no_price.any() and c_pbook:
    merged.loc[no_price & merged[c_pbook].gt(0), "decimal_odds"] = 1.0 / merged.loc[no_price, c_pbook]
merged["ev_realized_per_$1"] = merged["result"] * (merged["decimal_odds"] - 1.0) - (1 - merged["result"]) * 1.0

# Edge sign capture
merged["edge_dir"] = (p_model_eff - p_mkt_eff) if c_pbook else np.nan
merged["edge_hit"] = (((merged["edge_dir"] > 0) & (merged["result"] == 1)) | ((merged["edge_dir"] < 0) & (merged["result"] == 0))).astype("float")

# Overall
try:
    corr = float(pd.Series(merged[c_pmod]).corr(pd.Series(merged["result"])))
except Exception:
    corr = np.nan

print(f"[overall] rows={len(merged)}  hit={merged['result'].mean():.3f}  "
      f"Brier={merged['brier'].mean():.4f}  LogLoss={merged['logloss'].mean():.4f}  "
      f"ROI/1={merged['ev_realized_per_$1'].mean():.4f}  corr(p,y)={corr:.3f}")

# Write grades
keep = ["player_key","market_std","side","point_key"] if "point_key" in merged.columns else ["player_key","market_std","side"]
keep += [c for c in [c_pmod, c_pbook, c_odds, "decimal_odds", "result", "brier", "logloss", "edge_dir", "edge_hit", "ev_realized_per_$1"] if c in merged.columns]
grades = merged[keep].copy()
grades_out.parent.mkdir(parents=True, exist_ok=True)
grades.to_csv(grades_out, index=False)
print("[write] grades ->", grades_out)

# Per-market performance (dict-based named aggregation to allow '$' in col name)
def safemean(s):
    s = pd.to_numeric(s, errors="coerce")
    return s.dropna().mean()

perf = grades.groupby("market_std", dropna=False).agg({
    "n": ("result","count"),
    "hit": ("result","mean"),
    "brier": ("brier", safemean),
    "logloss": ("logloss", safemean),
    "edge_hit_rate": ("edge_hit", safemean),
    "avg_edge_dir": ("edge_dir", safemean),
    "roi_per_$1": ("ev_realized_per_$1", safemean),
}).reset_index()

perf.to_csv(market_out, index=False)
print("[write] per-market ->", market_out)

# Rankers
top_by_roi   = perf.sort_values(["roi_per_$1","n"], ascending=[False, False]).head(12)
top_by_brier = perf.sort_values(["brier","n"], ascending=[True, False]).head(12)

display(perf.sort_values("roi_per_$1", ascending=False).head(20))
display(top_by_roi)
display(top_by_brier)

# Tweet-ready summary
overall = {
    "n": int(grades["result"].count()),
    "hit": float(grades["result"].mean()),
    "brier": float(pd.to_numeric(grades["brier"], errors="coerce").mean()),
    "logloss": float(pd.to_numeric(grades["logloss"], errors="coerce").mean()),
    "roi": float(pd.to_numeric(grades["ev_realized_per_$1"], errors="coerce").mean()),
}
print("\n--- Tweet-ready ---")
print(f"Wk2 model vs actuals — n={overall['n']}, hit={overall['hit']:.3f}, Brier={overall['brier']:.3f}, LogLoss={overall['logloss']:.3f}, ROI/1={overall['roi']:.3f}.")

pf = perf[perf["n"] >= 25].sort_values("roi_per_$1", ascending=False).head(5)  # min sample to avoid tiny sets
for _, r in pf.iterrows():
    print(f"{r['market_std']}: n={int(r['n'])}, hit={r['hit']:.3f}, ROI/1={r['roi_per_$1']:.3f}, Brier={r['brier']:.3f}")


[BASE] /Users/pwitt/fourth-and-value
[check] props: /Users/pwitt/fourth-and-value/data/props/props_with_model_week2.csv True
[check] stats parquet: /Users/pwitt/fourth-and-value/data/weekly_player_stats_2025.parquet True
[check] stats csv.gz : /Users/pwitt/fourth-and-value/data/weekly_player_stats_2025.csv.gz False
[props] shape: (2673, 24)
[stats] shape (raw): (2041, 114)
[stats] after season/week filter: (970, 114)
[join] props.player_key  <->  stats.player_id
[stats] using columns: ['attempts', 'completions', 'passing_tds', 'passing_yards', 'player_id', 'receiving_tds', 'receiving_yards', 'receptions', 'rushing_tds', 'rushing_yards']
[merge] merged rows: 2673


KeyError: "['side'] not in index"

### (Optional) Quick ROI chart by market

This uses matplotlib (no styles/colors).

In [4]:

import matplotlib.pyplot as plt

pf = perf.sort_values("roi_per_$1", ascending=False).head(12)
plt.figure()
plt.bar(pf["market_std"].astype(str), pf["roi_per_$1"])
plt.title("Week 2 — ROI per $1 by Market (Top 12)")
plt.xticks(rotation=45, ha="right")
plt.ylabel("ROI per $1")
plt.tight_layout()
plt.show()


Matplotlib is building the font cache; this may take a moment.


NameError: name 'perf' is not defined