# Build Week 2 Actuals

This notebook derives Week 2 **actuals** for player props by joining your modeled props to weekly player stats and computing outcomes.

**Inputs**
- `data/props/props_with_model_week2.csv` (from your pipeline)
- Weekly player stats file (one of):
  - `data/weekly_player_stats_2025.parquet`
  - `data/weekly_player_stats_2025.csv.gz`

**Output**
- `data/actuals/week2.csv` with:
  - `player_key, market_std, point_key, side, actual_value, result`

> Markets handled now: common O/U (yards/receptions/attempts/completions/TDs/INTs), and **anytime_td** (YES/NO). Markets like **first_td**, **longest reception/rush**, or exotic combos aren’t computed here yet and will be skipped.


In [1]:

from pathlib import Path
import pandas as pd

WEEK = 2
SEASON = 2025

# --- find repo base ---
BASE = Path.cwd()
for p in [BASE] + list(BASE.parents):
    if (p/"Makefile").exists() and (p/"data").exists():
        BASE = p; break
print("[BASE]", BASE)

props_path = BASE / f"data/props/props_with_model_week{WEEK}.csv"
stats_parq = BASE / "data/weekly_player_stats_2025.parquet"
stats_csvz = BASE / "data/weekly_player_stats_2025.csv.gz"
out_actuals = BASE / f"data/actuals/week{WEEK}.csv"

if not props_path.exists():
    raise SystemExit(f"[ERR] missing props: {props_path}")

# load props
dfp = pd.read_csv(props_path)
print("[props] shape:", dfp.shape)

# load stats
if stats_parq.exists():
    dfs = pd.read_parquet(stats_parq)
elif stats_csvz.exists():
    dfs = pd.read_csv(stats_csvz)
else:
    raise SystemExit("[ERR] missing weekly player stats: expected data/weekly_player_stats_2025.parquet or .csv.gz")

print("[stats] shape:", dfs.shape)

# Normalize column names we expect
# nflverse often uses 'season','week','player_id' (gsis_id or gsis_it_id), and stat fields like passing_yards, rushing_yards, etc.
dfs_cols = {c.lower(): c for c in dfs.columns}
def has(col): return col in dfs_cols
def col(col): return dfs_cols[col]

# Filter to target season/week if present
if has("season"): dfs = dfs[dfs[col("season")] == SEASON]
if has("week"):   dfs = dfs[dfs[col("week")] == WEEK]
print("[stats] after season/week filter:", dfs.shape)

# choose join key
# Prefer 'player_key' in props matching a stats id column. We try common id/name fallbacks.
join_left = "player_key" if "player_key" in dfp.columns else None
stats_id_candidates = ["player_id","gsis_id","gsis_it_id","nfl_id","pfr_player_id"]
join_right = None
for cand in stats_id_candidates:
    if cand in dfs.columns:
        join_right = cand; break

# If we can't find id columns, fall back to player name (less reliable)
if join_left is None or join_right is None:
    print("[warn] falling back to name join; ensure names match between sources.")
    join_left = next((c for c in ["player","name","player_name"] if c in dfp.columns), None)
    join_right = next((c for c in ["player","name","player_name","full_name"] if c in dfs.columns), None)

if join_left is None or join_right is None:
    raise SystemExit("[ERR] no viable join keys between props and stats. Expected player_key/id or player/name.")

print(f"[join] props.{join_left}  ↔  stats.{join_right}")

# Minimal columns we want from props
need = ["market_std","side"]
for n in need:
    if n not in dfp.columns:
        raise SystemExit(f"[ERR] props missing required column: {n}")
have_point = "point_key" in dfp.columns

# Market mapping: market_std -> (stats_column, type)
# type: 'count', 'yards', 'yesno', etc. Mainly used for clarity; logic below uses numeric comparison or >0 rules.
# Aliases handled by including multiple keys.
M = {}

def add(keys, stat_col):
    for k in keys:
        M[k] = stat_col

# Yards
add(["rush_yds","rushing_yds","rush_yards","rushing_yards"],         "rushing_yards")
add(["recv_yds","reception_yds","receiving_yards"],                   "receiving_yards")
add(["pass_yds","passing_yards"],                                     "passing_yards")

# Counts
add(["rec","receptions"],                                             "receptions")
add(["rush_att","rush_attempts","rushing_attempts","carries"],        "rushing_attempts")
add(["pass_cmp","pass_completions","completions"],                    "completions")
add(["pass_att","pass_attempts"],                                     "attempts")
add(["pass_tds","passing_tds"],                                       "passing_tds")
add(["pass_ints","pass_interceptions","interceptions_thrown"],        "interceptions")

# Yes/No
# 'anytime_td' => rushing_tds + receiving_tds > 0
YESNO = {"anytime_td"}

# Unsupported/derived later (skipped safely if present)
UNSUPPORTED_PREFIX = ("first_td","first_team_td","rush_longest","reception_longest","longest_reception","longest_rush")

# Build stats helpers (handle missing columns by filling 0)
def scol(df, name):
    if name in df.columns: return df[name]
    # Try lowercase lookup
    lc = {c.lower(): c for c in df.columns}
    if name.lower() in lc: return df[lc[name.lower()]]
    # missing -> zeros
    return 0

# Prepare condensed stats with the columns we need
needed_stats_cols = set([join_right, "season" if "season" in dfs.columns else None, "week" if "week" in dfs.columns else None])
needed_stats_cols.discard(None)
needed_stats_cols = set(needed_stats_cols)

# union of referenced stat columns
for mk, sc in M.items():
    needed_stats_cols.add(sc)
# yes/no extras
needed_stats_cols.update(["rushing_tds","receiving_tds","interceptions"])

dfs_small = dfs[[c for c in needed_stats_cols if c in dfs.columns]].copy()
print("[stats] using columns:", sorted(dfs_small.columns))

# Join props to stats (left join, keep all props)
dfj = dfp.merge(dfs_small, left_on=join_left, right_on=join_right, how="left", suffixes=("","_stat"))
print("[join] merged shape:", dfj.shape)

# Compute actual_value per row
def actual_for_row(row):
    m = str(row.get("market_std","")).strip().lower()
    # skip unsupported
    if m.startswith(UNSUPPORTED_PREFIX):
        return None

    if m in YESNO:
        # anytime_td
        td = 0
        if "rushing_tds" in row and pd.notna(row["rushing_tds"]): td += row["rushing_tds"]
        if "receiving_tds" in row and pd.notna(row["receiving_tds"]): td += row["receiving_tds"]
        return float(td)

    # map to a stats column
    statcol = None
    if m in M:
        statcol = M[m]
    else:
        # try some gentle normalizations
        m2 = m.replace(" ", "_").replace("-", "_")
        if m2 in M: statcol = M[m2]
    if statcol is None:
        return None
    val = row.get(statcol, None)
    try:
        return float(val) if pd.notna(val) else None
    except Exception:
        return None

def decide_result(side, actual_value, point):
    if actual_value is None:
        return None
    s = str(side).strip().lower()
    if s in ("yes","no"):
        # yes/no markets: interpret threshold as >0 for YES
        yes = (actual_value > 0)
        return 1 if ((s == "yes" and yes) or (s == "no" and not yes)) else 0
    # over/under with numeric point
    try:
        p = float(point) if point is not None else None
    except Exception:
        p = None
    if p is None:
        return None
    if s == "over":
        return 1 if actual_value >= p else 0
    if s == "under":
        return 1 if actual_value <= p else 0
    return None

import numpy as np

# compute
dfj["actual_value"] = dfj.apply(actual_for_row, axis=1)

point_col = "point_key" if have_point else None
dfj["result"] = [
    decide_result(side=row["side"], actual_value=row["actual_value"], point=(row.get(point_col) if point_col else None))
    for _, row in dfj.iterrows()
]

# Filter to rows where we produced an outcome or at least have an actual_value
supported_mask = dfj["actual_value"].notna()
unsupported = (~supported_mask).sum()
print(f"[info] unsupported/unknown markets or missing stats rows skipped: {unsupported}")

out_cols = ["player_key","market_std","side","actual_value","result"]
if have_point: out_cols.insert(2, "point_key")

# Best-effort fill for player_key if missing
if "player_key" not in dfj.columns and join_left in dfj.columns:
    dfj["player_key"] = dfj[join_left]

out = dfj.loc[supported_mask, [c for c in out_cols if c in dfj.columns]].copy()

out.parent.mkdir(parents=True, exist_ok=True)
out.to_csv(out_actuals, index=False)
print(f"[OK] wrote {len(out)} rows to {out_actuals}")
print(out.head(10))


[BASE] /Users/pwitt/fourth-and-value
[props] shape: (2673, 24)
[stats] shape: (1142, 114)
[stats] after season/week filter: (71, 114)
[join] props.player_key  ↔  stats.player_id


SystemExit: [ERR] props missing required column: side

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
