# Fourth & Value — Minimal Model QC

This notebook auto-detects the latest `params_week*.csv` and `props_with_model_week*.csv` under `data/props/`,
sanity-checks σ/λ, and recomputes simple model probabilities for a few spotlight players to flag degenerate 100%/0% cases.

**How to use**: place this notebook in your project root and run all cells.

In [8]:
import pandas as pd, numpy as np, re
from pathlib import Path





params = pd.read_csv("/Users/pwitt/fourth-and-value/data/props/params_week5.csv")
merged = pd.read_csv('/Users/pwitt/fourth-and-value/data/props/props_with_model_week5.csv')
params.head(2), merged.head(2)


(   player_display_name             name_std  market_std  n_games  mu  sigma  \
 0       Kyren Williams       kyren williams  anytime_td      4.0 NaN    NaN   
 1  Christian McCaffrey  christian mccaffrey  anytime_td      4.0 NaN    NaN   
 
    lam  season  week              built_at  
 0  0.5    2025     5  2025-10-02T18:56:56Z  
 1  0.5    2025     5  2025-10-02T18:56:56Z  ,
                             game_id         commence_time         home_team  \
 0  c4b72eabb3d557e73022ec730d8e3944  2025-10-03T00:16:00Z  Los Angeles Rams   
 1  c4b72eabb3d557e73022ec730d8e3944  2025-10-03T00:16:00Z  Los Angeles Rams   
 
              away_team                                    game   bookmaker  \
 0  San Francisco 49ers  San Francisco 49ers @ Los Angeles Rams  draftkings   
 1  San Francisco 49ers  San Francisco 49ers @ Los Angeles Rams  draftkings   
 
   bookmaker_title         market market_std          player  ... sigma  lam  \
 0      DraftKings  player_1st_td     1st_td  Kyren Willia

In [9]:
# σ/λ health
import numpy as np
def pct(x): return f"{100*float(x):.1f}%"
health = {
    "params_rows": len(params),
    "merged_rows": len(merged),
    "sigma_null%": pct(params["sigma"].isna().mean()) if "sigma" in params.columns else "NA",
    "sigma_le0%":  pct(params["sigma"].fillna(0).le(0).mean()) if "sigma" in params.columns else "NA",
    "lam_null%":   pct(params["lam"].isna().mean()) if "lam" in params.columns else "NA",
    "lam_le0%":    pct(params["lam"].fillna(0).le(0).mean()) if "lam" in params.columns else "NA",
    "model_prob_1%": pct((merged.get("model_prob", np.nan)==1.0).mean()) if "model_prob" in merged.columns else "NA",
    "model_prob_0%": pct((merged.get("model_prob", np.nan)==0.0).mean()) if "model_prob" in merged.columns else "NA",
}
pd.DataFrame([health])


Unnamed: 0,params_rows,merged_rows,sigma_null%,sigma_le0%,lam_null%,lam_le0%,model_prob_1%,model_prob_0%
0,898,9099,55.1%,55.1%,59.4%,80.2%,0.2%,10.2%


In [10]:
# Recompute simple model prob to flag degeneracy for a few players
from math import erf, isfinite
def std_norm_cdf(z): return 0.5*(1.0+erf(z/np.sqrt(2.0)))
def normal_over_prob(mu, sigma, line):
    if not (np.isfinite(mu) and np.isfinite(sigma) and np.isfinite(line)): return np.nan
    if sigma <= 0: return 1.0 if mu > line else 0.0
    z = (line - mu)/sigma
    return 1.0 - std_norm_cdf(z)
def poisson_sf(lam, k):
    if not (np.isfinite(lam) and np.isfinite(k)): return np.nan
    if lam < 0: return np.nan
    sigma = np.sqrt(max(lam, 1e-9))
    return normal_over_prob(lam, sigma, k - 0.5)

NORMAL = {"rush_yds","recv_yds","pass_yds","receptions","rush_attempts","pass_attempts","pass_completions"}
POISSON = {"pass_tds","pass_interceptions","anytime_td"}

def qc_prob(row):
    m = str(row.get("market_std","")).strip().lower()
    side = str(row.get("side","")).strip().lower()
    line = row.get("point", np.nan)
    mu = row.get("mu", np.nan)
    sigma = row.get("sigma", np.nan)
    lam = row.get("lam", np.nan)
    if m in NORMAL:
        p_over = normal_over_prob(mu, sigma, line)
        return p_over if side in ("over","yes","") else (1.0 - p_over)
    if m in POISSON:
        try: k = int(np.floor(line))
        except Exception: k = np.nan
        p_ge = poisson_sf(lam, k)
        return p_ge if side in ("over","yes","") else (1.0 - p_ge)
    return np.nan

players = ["Mac Jones", "Christian McCaffrey", "Skyy Moore", "Puka Nacua"]
for c in ["mu","sigma","lam"]:
    if c not in merged.columns: merged[c] = np.nan

focus = merged[merged["player"].astype(str).str.contains("|".join(players), case=False, na=False)].copy()
focus["qc_model_prob"] = focus.apply(qc_prob, axis=1)
focus["sigma_flag"] = (focus["sigma"].fillna(0) <= 0).map({True:"σ<=0", False:"ok"})
focus["degenerate_prob"] = np.where((focus["qc_model_prob"]>=0.9999)|(focus["qc_model_prob"]<=0.0001), "EXTREME", "")
cols = ["game","player","market_std","side","point","price","mu","sigma","lam","model_prob","qc_model_prob","edge_bps","commence_time","sigma_flag","degenerate_prob"]
focus.sort_values(["player","market_std","side"])[cols].head(50)


Unnamed: 0,game,player,market_std,side,point,price,mu,sigma,lam,model_prob,qc_model_prob,edge_bps,commence_time,sigma_flag,degenerate_prob
3,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,1st_td,,,650,,,,,,,2025-10-03T00:16:00Z,σ<=0,
104,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,1st_td,,,475,,,,,,,2025-10-03T00:16:00Z,σ<=0,
184,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,1st_td,,,575,,,,,,,2025-10-03T00:16:00Z,σ<=0,
262,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,1st_td,,,600,,,,,,,2025-10-03T00:16:00Z,σ<=0,
407,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,1st_td,,,525,,,,,,,2025-10-03T00:16:00Z,σ<=0,
559,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,1st_td,,,600,,,,,,,2025-10-03T00:16:00Z,σ<=0,
30,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,anytime_td,yes,,-110,,,0.5,0.393469,,-1303.4,2025-10-03T00:16:00Z,σ<=0,
134,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,anytime_td,yes,,-140,,,0.5,0.393469,,-1898.6,2025-10-03T00:16:00Z,σ<=0,
208,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,anytime_td,yes,,-110,,,0.5,0.393469,,-1303.4,2025-10-03T00:16:00Z,σ<=0,
289,San Francisco 49ers @ Los Angeles Rams,Christian McCaffrey,anytime_td,yes,,-105,,,0.5,0.393469,,-1187.3,2025-10-03T00:16:00Z,σ<=0,


In [11]:
# Median σ by market to catch markets with near-zero variance
if "market_std" in params.columns and "sigma" in params.columns:
    med_sigma = (params.groupby("market_std", dropna=False)["sigma"]
                      .median()
                      .reset_index()
                      .rename(columns={"sigma":"median_sigma"})
                      .sort_values("median_sigma"))
    med_sigma.head(20)
else:
    print("params missing market_std/sigma")
