In [37]:
# ==== WEEK 2 GRADING — STABLE / DEFENSIVE ====
from pathlib import Path
import sys, importlib
import pandas as pd, numpy as np
from sklearn.metrics import brier_score_loss, log_loss

# --- 0) Make scripts/ importable (so we reuse your normalizers) ---
ROOT = None
for p in [Path.cwd()] + list(Path.cwd().parents):
    if (p/"scripts/common_markets.py").exists():
        ROOT = p; break
if ROOT is None:
    raise FileNotFoundError("Couldn't find scripts/common_markets.py walking up from CWD.")
sys.path.insert(0, str(ROOT/"scripts"))
cm = importlib.import_module("common_markets")
standardize_input = cm.standardize_input
std_player_name   = getattr(cm, "std_player_name", lambda s: s)

print("[import] using", ROOT/"scripts/common_markets.py")

# --- 1) Load + normalize predictions (already pipeline output but columns vary) ---
dfp_raw = pd.read_csv(ROOT/"data/props/props_with_model_week2.csv")

dfp = standardize_input(
    dfp_raw,
    market_col_candidates=("market_std","market","market_name","market_key","key","market_slug"),
    name_col_candidates=("side","name","selection","bet_name","over_under","bet_side","bet"),
    point_col_candidates=("line","point","odds_point","bet_line","points","handicap","total"),
    player_col_candidates=("name_std","player","player_name","athlete","athlete_name","name_player"),
).rename(columns={"point":"line","name":"side"})

# Normalize types now (avoid dtype mismatch later)
dfp["line"] = pd.to_numeric(dfp["line"], errors="coerce")
if "model_prob" in dfp.columns:
    dfp["model_prob"] = pd.to_numeric(dfp["model_prob"], errors="coerce")
# Force side to object dtype explicitly and standardize Over/Under/Yes/No
_side_map = {"o":"Over","over":"Over","ovr":"Over","+":"Over",
             "u":"Under","under":"Under","und":"Under","-":"Under",
             "yes":"Yes","no":"No"}
dfp["side"] = (dfp["side"].astype("string").str.strip().str.lower()
               .map(_side_map).astype("object"))

need_cols = {"market_std","name_std","line","side"}
print("[dfp] cols present →", sorted(set(dfp.columns) & (need_cols | {"model_prob"})))

# --- 2) Load Week 2 actuals (nflverse weekly) ---
dfs = pd.read_parquet(ROOT/"data/weekly_player_stats_2025.parquet")
dfs = dfs.query("season == 2025 and week == 2").copy()
name_col = next((c for c in ["player_name","player","name"] if c in dfs.columns), None)
if not name_col:
    raise RuntimeError("[dfs] couldn't find a player name column")

dfs["name_std"] = dfs[name_col].map(std_player_name)

# --- 3) Map canonical markets -> nflverse columns (defensive) ---
def pick(df, *cands):
    for c in cands:
        if c in df.columns: return c
    return None

M2C = {
    # canonical OU markets from your repo
    "rush_yds":         pick(dfs, "rushing_yards", "rush_yards"),
    "recv_yds":         pick(dfs, "receiving_yards", "rec_yards", "reception_yards"),
    "receptions":       pick(dfs, "receptions"),
    "pass_yds":         pick(dfs, "passing_yards", "pass_yards"),
    "rush_attempts":    pick(dfs, "rush_attempts", "rushing_attempts", "carries"),
    "pass_attempts":    pick(dfs, "passing_attempts", "pass_att", "attempts"),
    "pass_completions": pick(dfs, "passing_completions", "pass_comp", "completions"),
    # Poisson-ish
    "pass_tds":         pick(dfs, "passing_tds"),
    "interceptions":    pick(dfs, "interceptions", "passing_interceptions", "pass_interceptions", "ints"),
    # optional longest (if you want to grade these too)
    "reception_longest": pick(dfs, "receiving_long", "rec_long", "long_rec"),
    "rush_longest":      pick(dfs, "rushing_long", "rush_long", "long_rush"),
    # Yes/No
    "anytime_td":       None,  # computed from parts
}

td_parts = [c for c in [
    "total_tds","rushing_tds","receiving_tds",
    "kick_return_tds","punt_return_tds","defensive_tds","special_teams_tds"
] if c in dfs.columns]

dfs_slice_cols = {"name_std"} | {c for c in M2C.values() if c} | set(td_parts)
dfs_slice = dfs[list(dfs_slice_cols)].copy()
# If a book encoded anytime TD as Over/Under, convert to Yes/No
mask_at = merged["market_std"].eq("anytime_td") & merged["side"].isin(["Over","Under"])
merged.loc[mask_at & merged["side"].eq("Over"),  "side"] = "Yes"
merged.loc[mask_at & merged["side"].eq("Under"), "side"] = "No"

# --- 4) Merge ---
merged = dfp.merge(dfs_slice, on="name_std", how="left", suffixes=("","_act"))
print(f"[merge] rows: {len(merged):,}")

# --- 5) actual_value per market (index-safe) ---
merged["actual_value"] = np.nan
for m, col in M2C.items():
    if col:
        sel = merged["market_std"].eq(m)
        if sel.any():
            merged.loc[sel, "actual_value"] = pd.to_numeric(merged.loc[sel, col], errors="coerce")

# Anytime TD (binary), pandas-only to keep index alignment
# --- Anytime TD (binary) — compute td_all (0/1) as an index-aligned Series
sel_any = merged["market_std"].eq("anytime_td")
td_all = None

if sel_any.any():
    if "total_tds" in merged.columns:
        td_all = (
            pd.to_numeric(merged["total_tds"], errors="coerce")
            .fillna(0)
            .gt(0)
            .astype(int)
        )
    else:
        td_cols = [
            "rushing_tds","receiving_tds","kick_return_tds",
            "punt_return_tds","defensive_tds","special_teams_tds"
        ]
        avail = [c for c in td_cols if c in merged.columns]
        if avail:
            td_frame = pd.concat(
                [pd.to_numeric(merged[c], errors="coerce").fillna(0) for c in avail],
                axis=1
            )
            td_all = td_frame.sum(axis=1).gt(0).astype(int)
        else:
            td_all = pd.Series(0, index=merged.index, dtype="int64")

    # Write actual_value only for anytime rows (float for consistency)
    merged.loc[sel_any, "actual_value"] = td_all.loc[sel_any].astype(float)

    # Derive actual_side directly from td_all (no astype on NaNs)
    yes_idx = td_all.index[sel_any & td_all.eq(1)]
    no_idx  = td_all.index[sel_any & td_all.eq(0)]
    merged.loc[yes_idx, "actual_side"] = "Yes"
    merged.loc[no_idx,  "actual_side"] = "No"


# --- 6) Derive actual_side (ensure column is object dtype BEFORE assignment) ---
merged["actual_side"] = pd.Series(pd.NA, index=merged.index, dtype="object")

OU = {
    "rush_yds","recv_yds","receptions","pass_yds",
    "rush_attempts","pass_attempts","pass_completions",
    "pass_tds","interceptions","reception_longest","rush_longest"
}
ou_mask = merged["market_std"].isin(OU) & merged["line"].notna() & merged["actual_value"].notna()

# Assign using masks (strings into object column => no dtype complaint)
merged.loc[ou_mask & (merged["actual_value"] > merged["line"]), "actual_side"] = "Over"
merged.loc[ou_mask & (merged["actual_value"] < merged["line"]), "actual_side"] = "Under"
merged.loc[ou_mask & (merged["actual_value"] == merged["line"]), "actual_side"] = "Push"

any_mask = merged["market_std"].eq("anytime_td") & merged["actual_value"].notna()
merged.loc[any_mask & merged["actual_value"].notna() & (merged["actual_value"] > 0),  "actual_side"] = "Yes"
merged.loc[any_mask & merged["actual_value"].notna() & (merged["actual_value"] == 0), "actual_side"] = "No"

print("[coverage] with actual_side:", int(merged["actual_side"].notna().sum()))
print("[coverage] pushes:", int((merged["actual_side"]=="Push").sum()))

# --- 7) Grade (ignore Push; require a declared side) ---
mask_grade = merged["actual_side"].notna() & merged["side"].notna() & (merged["actual_side"]!="Push")
merged["hit"] = np.where(mask_grade & (merged["side"] == merged["actual_side"]), 1,
                  np.where(mask_grade, 0, np.nan))

print("[debug] stage counts")
print("  total rows         :", len(merged))
print("  has line           :", int(merged["line"].notna().sum()))
print("  has actual_value   :", int(merged["actual_value"].notna().sum()))
print("  OU gradeable rows  :", int(ou_mask.sum()))
print("  anytime gradeable  :", int(any_mask.sum()))
print("  side present       :", int(merged["side"].notna().sum()))
print("  mask_grade rows    :", int(mask_grade.sum()))

if merged["hit"].notna().any():
    hit_rate = merged.loc[merged["hit"].notna(),"hit"].mean()
    print(f"[hit-rate] {hit_rate:.3f} on {int(merged['hit'].notna().sum())} graded rows")
else:
    print("[hit-rate] no graded rows — check debug counts above.")

# Prob metrics if model_prob exists and valid
if "model_prob" in merged.columns:
    mprob_mask = mask_grade & merged["model_prob"].between(0,1)
    if mprob_mask.any():
        y = (merged.loc[mprob_mask, "actual_side"].isin(["Over","Yes"])).astype(int)
        p = merged.loc[mprob_mask, "model_prob"].astype(float)
        print(f"[brier] {brier_score_loss(y,p):.3f}   [logloss] {log_loss(y,p,labels=[0,1]):.3f}   (n={int(mprob_mask.sum())})")
    else:
        print("[prob] no rows with valid model_prob in [0,1]")
else:
    print("[prob] model_prob not present; skipping prob metrics")

# --- 8) Save outputs
OUT = ROOT/"data/eval"
OUT.mkdir(parents=True, exist_ok=True)
merged.to_csv(OUT/"grades_week2.csv", index=False)
by_market = (merged.loc[mask_grade]
             .groupby("market_std")
             .agg(rows=("hit","count"), hit_rate=("hit","mean"))
             .sort_values(["hit_rate","rows"], ascending=[False, False]))
by_market.to_csv(OUT/"grades_week2_by_market.csv")
print("[write]", OUT/"grades_week2.csv")
print("[write]", OUT/"grades_week2_by_market.csv")


[import] using /Users/pwitt/fourth-and-value/scripts/common_markets.py
[dfp] cols present → ['line', 'market_std', 'model_prob', 'name_std', 'side']
[merge] rows: 2,673
[coverage] with actual_side: 537
[coverage] pushes: 0
[debug] stage counts
  total rows         : 2673
  has line           : 1438
  has actual_value   : 537
  OU gradeable rows  : 0
  anytime gradeable  : 537
  side present       : 2673
  mask_grade rows    : 537
[hit-rate] 0.000 on 537 graded rows
[brier] 0.049   [logloss] 0.250   (n=537)
[write] /Users/pwitt/fourth-and-value/data/eval/grades_week2.csv
[write] /Users/pwitt/fourth-and-value/data/eval/grades_week2_by_market.csv
