# NFL Full‑Stack Predictions — Reproducible Weekly Run

**Generated:** 2025-11-02T17:39:14

Two spaces after periods.  Hyphens instead of em dashes.  This notebook is completely reproducible and self‑contained:
- **Cell 1** fetches all needed data live, normalizes it into a **stable input schema**, and saves to a CSV.
- **Cells 2‑N** run the full modeling stack (market prior, spread curve, injury and weather adjustments, Monte Carlo, Logistic Regression, Gradient‑Boosted Trees), then save and print the weekly summary.
- All file names, seeds, and knobs are declared as **constants at the top**.


In [3]:
# =========================
# CONSTANTS — TUNE HERE
# =========================
SEASON = 2025
WEEK   = None              # None = auto-detect from schedule.  Or set an int.
INPUT_CSV  = "data/nfl_week_inputs.csv"
OUTPUT_CSV = "data/nfl_week_summary.csv"
SEED       = 20251102      # Reproducible seed for Monte Carlo
SD_MARGIN  = 13.0          # League typical points variance for margin
MC_SIMS    = 100_000       # Monte Carlo draws
LOGIT_MAX_ITER = 400

# Optional: path to real historical training set (if present, used instead of synthetic)
NFLFASTR_GAMES = "data/nflfastR_games.csv"

# Team code normalization map to 3-letter abbreviations used across sources
TEAM_FIX = {
    "KAN":"KC","JAC":"JAX","GNB":"GB","SFO":"SF","NWE":"NE","TAM":"TB","NOR":"NO","LVR":"LV",
    "LAC":"LAC","LAR":"LAR","WSH":"WAS","WFT":"WAS","BLT":"BAL","ARI":"ARI","HST":"HOU",
    "OAK":"LV","SD":"LAC","STL":"LAR","LA":"LAR","CLV":"CLE","GBP":"GB"
}

# Stable input schema the model expects.  Do not change ordering.
SCHEMA = [
    "game_id","kickoff_et","home_team","away_team","favorite","spread_fav","total",
    "ml_fav","ml_dog","home_city","weather_precip","weather_temp_f","weather_breezy",
    "inj_out_fav","inj_doubt_fav","inj_quest_fav","inj_out_dog","inj_doubt_dog","inj_quest_dog",
    "off_epa_fav","def_epa_allowed_fav","off_epa_dog","def_epa_allowed_dog"
]

# 1) Pull live inputs and write a stable CSV

In [22]:
import pandas as pd, numpy as np, io, requests, re
from datetime import datetime
from pathlib import Path
from bs4 import BeautifulSoup

# Helper: normalize team code to 3-letter
def norm_team(x: str) -> str:
    if pd.isna(x): 
        return x
    x = str(x).strip().upper()
    return TEAM_FIX.get(x, x[:3])


## 1a. Schedule — nfl_data_py

### keep for reference

In [28]:
### KINDA WORKS - DOESNT SEE TIME
# ============================================================
# 1a. Schedule — nfl_data_py
# ============================================================
try:
    from nfl_data_py import import_schedules
    sched = import_schedules([SEASON])
    # Normalize columns
    if "game_date" in sched.columns:
        sched["game_date"] = pd.to_datetime(sched["game_date"]).dt.strftime("%Y-%m-%d")
    else:
        alt = [c for c in sched.columns if "date" in c.lower()]
        if alt:
            sched.rename(columns={alt[0]: "game_date"}, inplace=True)
            sched["game_date"] = pd.to_datetime(sched["game_date"]).dt.strftime("%Y-%m-%d")
    # Auto-detect week if not provided
    if WEEK is None:
        today = pd.Timestamp.now(tz="America/New_York").tz_localize(None).normalize()
        sched["game_date"] = pd.to_datetime(sched["game_date"]).dt.tz_localize(None)
        this_week = sched.loc[pd.to_datetime(sched["game_date"]) <= today, "week"].max()
        WEEK = int(this_week) if pd.notna(this_week) else int(sched["week"].max())
except Exception as e:
    raise RuntimeError(f"Schedule fetch failed: {e}")

# Pick whichever columns nfl_data_py actually gives us
home_col = next((c for c in sched.columns if "home" in c.lower() and "team" in c.lower()), None)
away_col = next((c for c in sched.columns if "away" in c.lower() and "team" in c.lower()), None)
if home_col is None or away_col is None:
    raise RuntimeError(f"Could not find home/away team columns. Found columns: {sched.columns.tolist()}")

# Pick a start-time column if it exists
possible_time_cols = ["start_time", "game_time_eastern", "game_time", "gamedetail"]
time_col = next((c for c in possible_time_cols if c in sched.columns), None)
if time_col is None:
    print("⚠️ No explicit start time column found in schedule.  Defaulting to 13:00 ET.")
    sched["start_time"] = "13:00"
else:
    sched["start_time"] = sched[time_col].fillna("13:00")

⚠️ No explicit start time column found in schedule.  Defaulting to 13:00 ET.


### newer

In [34]:
# ============================================================
# 1a. Schedule — nfl_data_py  (final, schema-safe)
# ============================================================
try:
    from nfl_data_py import import_schedules

    # Pull schedule
    sched = import_schedules([SEASON]).copy()

    # --- Ensure proper date field ---
    if "gameday" in sched.columns:
        sched.rename(columns={"gameday": "game_date"}, inplace=True)
    elif "__game_date__" in sched.columns:
        sched.rename(columns={"__game_date__": "game_date"}, inplace=True)
    else:
        raise RuntimeError(f"No recognizable date field found in schedule: {sched.columns.tolist()}")
    sched["game_date"] = pd.to_datetime(sched["game_date"], errors="coerce").dt.strftime("%Y-%m-%d")

    # --- Ensure start time field ---
    time_col = None
    for c in ["start_time", "__start_time__", "gametime", "game_time"]:
        if c in sched.columns:
            time_col = c
            break
    if time_col is None:
        print("⚠️ No explicit start time column found. Defaulting to 13:00 ET.")
        sched["start_time"] = "13:00"
    else:
        # Normalize to string HH:MM
        sched["start_time"] = sched[time_col].astype(str).fillna("13:00")

    # --- Determine current or next week automatically ---
    if WEEK is None:
        today = pd.Timestamp.now(tz="America/New_York").tz_localize(None).normalize()
        sched["game_date"] = pd.to_datetime(sched["game_date"], errors="coerce").dt.tz_localize(None)
        this_week = sched.loc[pd.to_datetime(sched["game_date"]) <= today, "week"].max()
        WEEK = int(this_week) if pd.notna(this_week) else int(sched["week"].max())

except Exception as e:
    raise RuntimeError(f"Schedule fetch failed: {e}")

# --- Home/away fields (already standard in your data) ---
home_col, away_col = "home_team", "away_team"

# --- Guarantee the target columns exist before slicing ---
required_cols = ["game_id", home_col, away_col, "game_date", "start_time"]
missing = [c for c in required_cols if c not in sched.columns]
if missing:
    raise RuntimeError(f"Missing expected columns in sched: {missing}")

# --- Build trimmed schedule table ---
games = sched.loc[sched["season"].eq(SEASON) & sched["week"].eq(WEEK), required_cols].copy()

print("✅ Schedule loaded successfully")
print(games)

✅ Schedule loaded successfully
              game_id home_team away_team   game_date start_time
7112  2025_09_BAL_MIA       MIA       BAL  2025-10-30      20:15
7113  2025_09_CHI_CIN       CIN       CHI  2025-11-02      13:00
7114  2025_09_MIN_DET       DET       MIN  2025-11-02      13:00
7115   2025_09_CAR_GB        GB       CAR  2025-11-02      13:00
7116  2025_09_DEN_HOU       HOU       DEN  2025-11-02      13:00
7117   2025_09_ATL_NE        NE       ATL  2025-11-02      13:00
7118   2025_09_SF_NYG       NYG        SF  2025-11-02      13:00
7119  2025_09_IND_PIT       PIT       IND  2025-11-02      13:00
7120  2025_09_LAC_TEN       TEN       LAC  2025-11-02      13:00
7121    2025_09_NO_LA        LA        NO  2025-11-02      16:05
7122   2025_09_JAX_LV        LV       JAX  2025-11-02      16:05
7123   2025_09_KC_BUF       BUF        KC  2025-11-02      16:25
7124  2025_09_SEA_WAS       WAS       SEA  2025-11-02      20:20
7125  2025_09_ARI_DAL       DAL       ARI  2025-11-03      

In [33]:

# --- Build a trimmed schedule table with canonical names (final fix) ---
# Look for any column that represents the game date or gameday
date_col_candidates = [c for c in sched.columns if any(k in c.lower() for k in ["gameday", "game_date", "date", "gametime"])]
if not date_col_candidates:
    raise RuntimeError(f"No usable date column found in schedule: {sched.columns.tolist()}")
date_col = date_col_candidates[0]

# If it's named 'gameday' or 'gametime', normalize its contents to a proper date string
sched[date_col] = pd.to_datetime(sched[date_col], errors="coerce").dt.strftime("%Y-%m-%d")

# Build consistent columns (guaranteed to exist now)
expected_cols = ["game_id", home_col, away_col, date_col, "start_time"]
for col in expected_cols:
    if col not in sched.columns:
        sched[col] = np.nan

games = (
    sched.query("season == @SEASON and week == @WEEK")[expected_cols]
         .rename(columns={
             home_col: "home_team",
             away_col: "away_team",
             date_col: "game_date"
         })
         .copy()
)

# Normalize team codes
games["home_team"] = games["home_team"].map(norm_team)
games["away_team"] = games["away_team"].map(norm_team)

#### print

In [36]:
print(sched.columns.tolist())
print(sched.head(3))

print(games.columns.tolist())
print(games.head(3))

['game_id', 'season', 'game_type', 'week', 'game_date', 'weekday', 'gametime', 'away_team', 'away_score', 'home_team', 'home_score', 'location', 'result', 'total', 'overtime', 'old_game_id', 'gsis', 'nfl_detail_id', 'pfr', 'pff', 'espn', 'ftn', 'away_rest', 'home_rest', 'away_moneyline', 'home_moneyline', 'spread_line', 'away_spread_odds', 'home_spread_odds', 'total_line', 'under_odds', 'over_odds', 'div_game', 'roof', 'surface', 'temp', 'wind', 'away_qb_id', 'home_qb_id', 'away_qb_name', 'home_qb_name', 'away_coach', 'home_coach', 'referee', 'stadium_id', 'stadium', 'start_time']
              game_id  season game_type  week   game_date   weekday gametime  \
6991  2025_01_DAL_PHI    2025       REG     1  2025-09-04  Thursday    20:20   
6992   2025_01_KC_LAC    2025       REG     1  2025-09-05    Friday    20:00   
6993   2025_01_TB_ATL    2025       REG     1  2025-09-07    Sunday    13:00   

     away_team  away_score home_team  ...  away_qb_id  home_qb_id  \
6991       DAL        

## 1b. Odds — ESPN scoreboard JSON (public)

### pull using espn (future/live only)

#### endpoint was working, but code wasn't parsing

In [40]:
# ============================================================
# 1b. Odds — ESPN scoreboard JSON (public)
# ============================================================
def fetch_odds_espn():
    url = "https://site.web.api.espn.com/apis/v2/scoreboard/header?sport=football&league=nfl"
    r = requests.get(url, headers={"User-Agent":"Mozilla/5.0"})
    j = r.json()
    rows = []
    for ev in j.get("events", []):
        comps = ev.get("competitions", [])
        if not comps: 
            continue
        c = comps[0]
        if "competitors" not in c or len(c["competitors"]) < 2: 
            continue
        h = c["competitors"][0]; a = c["competitors"][1]
        home = norm_team(h["team"]["abbreviation"])
        away = norm_team(a["team"]["abbreviation"])
        spread_fav = None; total = None; ml_fav = None
        if c.get("odds"):
            o = c["odds"][0]
            total = o.get("overUnder", np.nan)
            try:
                ml_fav = float(o.get("moneyLine")) if o.get("moneyLine") is not None else np.nan
            except: 
                ml_fav = np.nan
            det = str(o.get("details", ""))
            m = re.search(r"([A-Z]{2,3})\s*[-−](\d+\.?\d*)", det)
            if m:
                fav_abbr = norm_team(m.group(1))
                val = float(m.group(2))
                spread_fav = val
        rows.append({
            "home_team": home, 
            "away_team": away,
            "spread_fav": spread_fav, 
            "total": total, 
            "ml_fav": ml_fav
        })
    return pd.DataFrame(rows)

odds = fetch_odds_espn()

#### end point working, code parsing, no history

In [42]:
# ============================================================
# 1b. Odds — ESPN main scoreboard JSON (working endpoint)
# ============================================================
def fetch_odds_espn():
    """
    Fetches NFL odds and totals from ESPN's live scoreboard API.
    Returns DataFrame with columns: home_team, away_team, spread_fav, total, ml_fav
    """
    url = "https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard"
    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    r.raise_for_status()
    j = r.json()

    rows = []
    for ev in j.get("events", []):
        comps = ev.get("competitions", [])
        if not comps:
            continue
        c = comps[0]
        if "competitors" not in c or len(c["competitors"]) < 2:
            continue

        # Competitor roles are marked "home"/"away" in this endpoint
        home, away = None, None
        for team in c["competitors"]:
            abbr = norm_team(team["team"]["abbreviation"])
            if team["homeAway"] == "home":
                home = abbr
            else:
                away = abbr

        spread_fav, total, ml_fav = np.nan, np.nan, np.nan
        if "odds" in c and c["odds"]:
            o = c["odds"][0]
            total = o.get("overUnder", np.nan)
            try:
                ml_fav = float(o.get("moneyLine")) if o.get("moneyLine") is not None else np.nan
            except:
                ml_fav = np.nan

            # Parse favorite and spread
            det = str(o.get("details", ""))
            m = re.search(r"([A-Z]{2,3})\s*[-−](\d+\.?\d*)", det)
            if m:
                fav_abbr = norm_team(m.group(1))
                spread_fav = float(m.group(2))
            else:
                spread_fav = np.nan

        if home and away:
            rows.append({
                "home_team": home,
                "away_team": away,
                "spread_fav": spread_fav,
                "total": total,
                "ml_fav": ml_fav
            })

    df = pd.DataFrame(rows)
    print(f"✅ Pulled {len(df)} games from ESPN odds feed")
    return df

odds = fetch_odds_espn()
print(odds.head())

✅ Pulled 14 games from ESPN odds feed
  home_team away_team  spread_fav  total  ml_fav
0       CIN       CHI         NaN    NaN     NaN
1       DET       MIN         NaN    NaN     NaN
2        GB       CAR         NaN    NaN     NaN
3       TEN       LAC         NaN    NaN     NaN
4        NE       ATL         NaN    NaN     NaN


### pull using odds-api

In [45]:
import requests, pandas as pd

API_KEY = "a1b9bc38f3e9c4d2e8a6ce056ec078e6"
url = f"https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds/?apiKey={API_KEY}&regions=us&markets=spreads,totals,h2h"

r = requests.get(url)
data = r.json()

rows = []
for g in data:
    home = g["home_team"]
    away = g["away_team"]
    for bk in g["bookmakers"]:
        for mk in bk["markets"]:
            if mk["key"] == "spreads":
                spread = mk["outcomes"][0]["point"]
            elif mk["key"] == "totals":
                total = mk["outcomes"][0]["point"]
    rows.append({
        "home_team": home,
        "away_team": away,
        "spread": spread,
        "total": total,
        "bookmaker": bk["title"],
        "commence_time": g["commence_time"]
    })
odds = pd.DataFrame(rows)
print(odds.head())

              home_team             away_team  spread  total bookmaker  \
0  New England Patriots       Atlanta Falcons    17.5   51.0    Bovada   
1   Pittsburgh Steelers    Indianapolis Colts     3.0   51.5    Bovada   
2    Cincinnati Bengals         Chicago Bears    -6.5   60.5    Bovada   
3      Tennessee Titans  Los Angeles Chargers    -7.5   61.5    Bovada   
4     Green Bay Packers     Carolina Panthers     6.0   35.5    Bovada   

          commence_time  
0  2025-11-02T18:02:21Z  
1  2025-11-02T18:02:29Z  
2  2025-11-02T18:02:36Z  
3  2025-11-02T18:02:45Z  
4  2025-11-02T18:02:52Z  


In [48]:
# Example normalization (extend as needed)
TEAM_ABBR = {
    "Arizona Cardinals": "ARI", "Atlanta Falcons": "ATL", "Baltimore Ravens": "BAL",
    "Buffalo Bills": "BUF", "Carolina Panthers": "CAR", "Chicago Bears": "CHI",
    "Cincinnati Bengals": "CIN", "Cleveland Browns": "CLE", "Dallas Cowboys": "DAL",
    "Denver Broncos": "DEN", "Detroit Lions": "DET", "Green Bay Packers": "GB",
    "Houston Texans": "HOU", "Indianapolis Colts": "IND", "Jacksonville Jaguars": "JAX",
    "Kansas City Chiefs": "KC", "Las Vegas Raiders": "LV", "Los Angeles Chargers": "LAC",
    "Los Angeles Rams": "LAR", "Miami Dolphins": "MIA", "Minnesota Vikings": "MIN",
    "New England Patriots": "NE", "New Orleans Saints": "NO", "New York Giants": "NYG",
    "New York Jets": "NYJ", "Philadelphia Eagles": "PHI", "Pittsburgh Steelers": "PIT",
    "San Francisco 49ers": "SF", "Seattle Seahawks": "SEA", "Tampa Bay Buccaneers": "TB",
    "Tennessee Titans": "TEN", "Washington Commanders": "WAS"
}

odds["home_team"] = odds["home_team"].map(TEAM_ABBR)
odds["away_team"] = odds["away_team"].map(TEAM_ABBR)

In [49]:
merged = games.merge(odds, on=["home_team", "away_team"], how="left")
print(merged[["game_id","home_team","away_team","spread","total"]])

            game_id home_team away_team  spread  total
0   2025_09_BAL_MIA       MIA       BAL     NaN    NaN
1   2025_09_CHI_CIN       CIN       CHI    -6.5   60.5
2   2025_09_MIN_DET       DET       MIN    -2.0   58.5
3    2025_09_CAR_GB        GB       CAR     6.0   35.5
4   2025_09_DEN_HOU       HOU       DEN    -1.5   36.5
5    2025_09_ATL_NE        NE       ATL    17.5   51.0
6    2025_09_SF_NYG       NYG        SF     7.0   51.0
7   2025_09_IND_PIT       PIT       IND     3.0   51.5
8   2025_09_LAC_TEN       TEN       LAC    -7.5   61.5
9     2025_09_NO_LA        LA        NO     NaN    NaN
10   2025_09_JAX_LV        LV       JAX    -1.5   44.5
11   2025_09_KC_BUF       BUF        KC     2.0   53.0
12  2025_09_SEA_WAS       WAS       SEA    -3.0   48.0
13  2025_09_ARI_DAL       DAL       ARI     3.5   53.5


### print

In [50]:
print(odds.columns.tolist())
print(odds)

['home_team', 'away_team', 'spread', 'total', 'bookmaker', 'commence_time']
   home_team away_team  spread  total bookmaker         commence_time
0         NE       ATL    17.5   51.0    Bovada  2025-11-02T18:02:21Z
1        PIT       IND     3.0   51.5    Bovada  2025-11-02T18:02:29Z
2        CIN       CHI    -6.5   60.5    Bovada  2025-11-02T18:02:36Z
3        TEN       LAC    -7.5   61.5    Bovada  2025-11-02T18:02:45Z
4         GB       CAR     6.0   35.5    Bovada  2025-11-02T18:02:52Z
5        NYG        SF     7.0   51.0    Bovada  2025-11-02T18:02:52Z
6        HOU       DEN    -1.5   36.5    Bovada  2025-11-02T18:02:56Z
7        DET       MIN    -2.0   58.5    Bovada  2025-11-02T18:03:03Z
8         LV       JAX    -1.5   44.5    Bovada  2025-11-02T21:05:00Z
9        LAR        NO   -14.5   44.5    Bovada  2025-11-02T21:05:00Z
10       BUF        KC     2.0   53.0    Bovada  2025-11-02T21:25:00Z
11       WAS       SEA    -3.0   48.0    Bovada  2025-11-03T01:20:00Z
12       DAL  

In [21]:
# ============================================================
# 1c. Weather — Open-Meteo geocoder + forecast
# ============================================================
CITY_BY_TEAM = {
    "ARI":"Glendale,US","ATL":"Atlanta,US","BAL":"Baltimore,US","BUF":"Orchard Park,US",
    "CAR":"Charlotte,US","CHI":"Chicago,US","CIN":"Cincinnati,US","CLE":"Cleveland,US",
    "DAL":"Arlington,US","DEN":"Denver,US","DET":"Detroit,US","GB":"Green Bay,US",
    "HOU":"Houston,US","IND":"Indianapolis,US","JAX":"Jacksonville,US","KC":"Kansas City,US",
    "LAC":"Inglewood,US","LAR":"Inglewood,US","LV":"Las Vegas,US","MIA":"Miami Gardens,US",
    "MIN":"Minneapolis,US","NE":"Foxborough,US","NO":"New Orleans,US","NYG":"East Rutherford,US",
    "NYJ":"East Rutherford,US","PHI":"Philadelphia,US","PIT":"Pittsburgh,US","SEA":"Seattle,US",
    "SF":"Santa Clara,US","TB":"Tampa,US","TEN":"Nashville,US","WAS":"Landover,US"
}

def fetch_weather(team):
    city = CITY_BY_TEAM.get(team)
    if not city: 
        return (np.nan, np.nan, np.nan, "")
    try:
        geo = requests.get(
            f"https://geocoding-api.open-meteo.com/v1/search?name={city.replace(' ','+')}",
            headers={"User-Agent":"Mozilla/5.0"}).json()
        lat, lon = geo["results"][0]["latitude"], geo["results"][0]["longitude"]
        w = requests.get(
            f"https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lon}&hourly=temperature_2m,precipitation_probability,windspeed_10m&forecast_days=1",
            headers={"User-Agent":"Mozilla/5.0"}).json()
        t = float(np.nanmean(w["hourly"]["temperature_2m"]))
        p = float(np.nanmean(w["hourly"]["precipitation_probability"]))
        wind = float(np.nanmean(w["hourly"]["windspeed_10m"]))
        breezy = 1 if wind >= 12 else 0
        return (p, t, breezy, city)
    except Exception:
        return (np.nan, np.nan, np.nan, city or "")

wx_rows = [fetch_weather(t) for t in games["home_team"]]
games[["weather_precip","weather_temp_f","weather_breezy","home_city"]] = pd.DataFrame(wx_rows, index=games.index)

# ============================================================
# 1d. EPA/play — compute from play-by-play
# ============================================================
try:
    from nfl_data_py import import_pbp_data as import_pbp
except ImportError:
    from nfl_data_py import import_pbp as import_pbp

def compute_team_epa_from_pbp(season: int, weeks=None):
    pbp = import_pbp([season])
    if weeks is not None:
        pbp = pbp[pbp["week"].isin(weeks)]
    mask = (
        pbp["play_type"].isin(["pass","run"])
        & (pbp["qb_spike"].fillna(0)==0)
        & (pbp["qb_kneel"].fillna(0)==0)
        & (pbp["penalty"].fillna(0)==0)
    )
    pbp = pbp.loc[mask, ["posteam","defteam","epa"]]
    off = pbp.groupby("posteam")["epa"].mean().reset_index().rename(columns={"posteam":"team","epa":"off_epa"})
    deff = pbp.groupby("defteam")["epa"].mean().reset_index().rename(columns={"defteam":"team","epa":"def_epa_allowed"})
    epa = off.merge(deff, on="team", how="outer")
    epa["team"] = epa["team"].astype(str).str.upper().str.replace(r"\W+","",regex=True).str[:3]
    epa["team"] = epa["team"].map(lambda x: TEAM_FIX.get(x, x))
    return epa

epa = compute_team_epa_from_pbp(SEASON)
epa_home = epa.add_suffix("_home").rename(columns={"team_home":"team_h"})
epa_away = epa.add_suffix("_away").rename(columns={"team_away":"team_a"})

# ============================================================
# 1e. Injuries — FantasyPros scrape
# ============================================================
def fetch_injuries():
    url = "https://www.fantasypros.com/nfl/injuries/"
    r = requests.get(url, headers={"User-Agent":"Mozilla/5.0"})
    soup = BeautifulSoup(r.text, "html.parser")
    rows = []
    for tr in soup.select("tbody tr"):
        tds = [td.get_text(strip=True) for td in tr.find_all("td")]
        if len(tds) < 3: 
            continue
        team = norm_team(tds[1])
        status = tds[-1].lower()
        rows.append({"team": team, "status": status})
    if not rows:
        return pd.DataFrame(columns=["team","inj_out","inj_doubt","inj_quest"])
    df = pd.DataFrame(rows)
    agg = df.groupby(["team","status"]).size().unstack(fill_value=0)
    for s in ["out","doubtful","questionable"]:
        if s not in agg.columns:
            agg[s] = 0
    agg = agg.reset_index().rename(columns={"out":"inj_out","doubtful":"inj_doubt","questionable":"inj_quest"})
    return agg

inj = fetch_injuries()
inj_home = inj.add_suffix("_home").rename(columns={"team_home":"team_h"})
inj_away = inj.add_suffix("_away").rename(columns={"team_away":"team_a"})

# ============================================================
# 1f. Merge everything cleanly
# ============================================================
df = (
    games.merge(epa_home, left_on="home_team", right_on="team_h", how="left")
         .merge(epa_away, left_on="away_team", right_on="team_a", how="left")
         .merge(inj_home, left_on="home_team", right_on="team_h", how="left")
         .merge(inj_away, left_on="away_team", right_on="team_a", how="left")
         .merge(odds, on=["home_team","away_team"], how="left")
)

# Fill missing numeric values
for col in ["inj_out_home","inj_doubt_home","inj_quest_home","inj_out_away","inj_doubt_away","inj_quest_away",
            "off_epa_home","def_epa_allowed_home","off_epa_away","def_epa_allowed_away"]:
    if col in df.columns:
        df[col] = df[col].fillna(0.0)

df["kickoff_et"] = df["game_date"].astype(str) + " " + df["start_time"].fillna("13:00")
df["favorite"] = np.where(df["spread_fav"].notna(),
                          df.apply(lambda r: r["away_team"] if float(r["spread_fav"])>0 else r["home_team"], axis=1),
                          df["home_team"])

out = pd.DataFrame({
    "game_id": df["game_id"],
    "kickoff_et": df["kickoff_et"],
    "home_team": df["home_team"],
    "away_team": df["away_team"],
    "favorite": df["favorite"],
    "spread_fav": pd.to_numeric(df["spread_fav"], errors="coerce"),
    "total": pd.to_numeric(df["total"], errors="coerce"),
    "ml_fav": pd.to_numeric(df["ml_fav"], errors="coerce"),
    "ml_dog": np.nan,
    "home_city": df["home_city"],
    "weather_precip": pd.to_numeric(df["weather_precip"], errors="coerce"),
    "weather_temp_f": pd.to_numeric(df["weather_temp_f"], errors="coerce"),
    "weather_breezy": pd.to_numeric(df["weather_breezy"], errors="coerce"),
    "inj_out_fav": np.where(df["favorite"] == df["home_team"], df["inj_out_home"], df["inj_out_away"]),
    "inj_doubt_fav": np.where(df["favorite"] == df["home_team"], df["inj_doubt_home"], df["inj_doubt_away"]),
    "inj_quest_fav": np.where(df["favorite"] == df["home_team"], df["inj_quest_home"], df["inj_quest_away"]),
    "inj_out_dog": np.where(df["favorite"] == df["home_team"], df["inj_out_away"], df["inj_out_home"]),
    "inj_doubt_dog": np.where(df["favorite"] == df["home_team"], df["inj_doubt_away"], df["inj_doubt_home"]),
    "inj_quest_dog": np.where(df["favorite"] == df["home_team"], df["inj_quest_away"], df["inj_quest_home"]),
    "off_epa_fav": np.where(df["favorite"] == df["home_team"], df["off_epa_home"], df["off_epa_away"]),
    "def_epa_allowed_fav": np.where(df["favorite"] == df["home_team"], df["def_epa_allowed_home"], df["def_epa_allowed_away"]),
    "off_epa_dog": np.where(df["favorite"] == df["home_team"], df["off_epa_away"], df["off_epa_home"]),
    "def_epa_allowed_dog": np.where(df["favorite"] == df["home_team"], df["def_epa_allowed_away"], df["def_epa_allowed_home"]),
})

out = out[SCHEMA].copy()
out.to_csv(INPUT_CSV, index=False)
print(f"✅ Wrote stable weekly input file to {INPUT_CSV}")
print(out.head(10))

KeyError: 'home_team'

In [20]:
print(epa.columns.tolist())
print(epa.head(3))

['team', 'off_epa', 'def_epa_allowed']
  team   off_epa  def_epa_allowed
0  ARI  0.003159         0.021111
1  ATL -0.038489         0.001264
2  BAL  0.027466         0.111421


## 2) Modeling utilities and reproducibility

In [None]:
import numpy as np, math, pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from pathlib import Path

rng = np.random.default_rng(SEED)

def implied_prob(odds):
    if pd.isna(odds): return np.nan
    return (-odds)/((-odds)+100) if odds < 0 else 100/(odds+100)

def fair_probs(ml_fav, ml_dog):
    if pd.isna(ml_fav) or pd.isna(ml_dog):
        return np.nan, np.nan, np.nan
    p_f = implied_prob(ml_fav); p_d = implied_prob(ml_dog)
    vig = p_f + p_d - 1.0
    return p_f/(1+vig), p_d/(1+vig), vig

def spread_to_prob(spread, k=0.18):
    if pd.isna(spread): return np.nan
    return 1/(1 + math.exp(-k*spread))

def injury_penalty(o, d, q):
    o = 0 if pd.isna(o) else o
    d = 0 if pd.isna(d) else d
    q = 0 if pd.isna(q) else q
    return 0.010*o + 0.006*d + 0.002*q

def monte_carlo_cover(spread, mu_margin, sd=SD_MARGIN, N=MC_SIMS, seed=SEED):
    r = np.random.default_rng(seed)
    margins = r.normal(loc=mu_margin, scale=sd, size=N)
    return float(np.mean(margins > spread)), float(np.mean(margins > 0)), np.percentile(margins,[5,25,50,75,95]).tolist()

def train_models_synth(n=20000, sd=SD_MARGIN, seed=SEED):
    r = np.random.default_rng(seed)
    spread = r.uniform(0, 12, size=n)
    off_diff_fav = r.normal(0, 0.10, size=n)
    off_diff_dog = r.normal(0, 0.10, size=n)
    mu = 0.75*spread + 25*off_diff_fav - 20*off_diff_dog
    margin = r.normal(loc=mu, scale=sd, size=n)
    covered = (margin > spread).astype(int)
    X = np.column_stack([spread, off_diff_fav, off_diff_dog]); y = covered
    logit = LogisticRegression(max_iter=LOGIT_MAX_ITER).fit(X,y)
    gb = GradientBoostingClassifier(random_state=seed).fit(X,y)
    return logit, gb, roc_auc_score(y, logit.predict_proba(X)[:,1]), roc_auc_score(y, gb.predict_proba(X)[:,1])

## 3) Optional real‑data training (if file present)

In [None]:
logit_real = gb_real = None
auc_logit_real = auc_gb_real = None

p = Path(NFLFASTR_GAMES)
if p.exists():
    try:
        real = pd.read_csv(p)
        need = {'spread_line','off_epa_fav','def_epa_fav','off_epa_dog','def_epa_dog','covered'}
        if need.issubset(real.columns):
            real = real.dropna(subset=list(need)).copy()
            real['off_diff_fav'] = real['off_epa_fav'] - real['def_epa_dog']
            real['off_diff_dog'] = real['off_epa_dog'] - real['def_epa_fav']
            Xr = real[['spread_line','off_diff_fav','off_diff_dog']].values
            yr = real['covered'].values.astype(int)
            logit_real = LogisticRegression(max_iter=LOGIT_MAX_ITER).fit(Xr, yr)
            gb_real    = GradientBoostingClassifier(random_state=SEED).fit(Xr, yr)
            auc_logit_real = roc_auc_score(yr, logit_real.predict_proba(Xr)[:,1])
            auc_gb_real    = roc_auc_score(yr, gb_real.predict_proba(Xr)[:,1])
            print(f"Trained real‑data models.  AUC logit={auc_logit_real:.3f}, GBM={auc_gb_real:.3f}")
        else:
            print("Real training file present but missing needed columns.  Using synthetic models.")
    except Exception as e:
        print(f"Real‑data training failed: {e}.  Using synthetic models.")
else:
    print("No nflfastR training file found.  Using synthetic models.")

## 4) Train synthetic cover models (fallback)

In [None]:
logit_s, gb_s, aucL_s, aucG_s = train_models_synth()
print(f"Synthetic models: AUC logit={aucL_s:.3f}, GBM={aucG_s:.3f}")

## 5) Evaluate the weekly slate and build summary table

In [None]:
import pandas as pd, numpy as np
inp = pd.read_csv(INPUT_CSV)
rows = []

for _, g in inp.iterrows():
    fav = g["favorite"]; dog = g["home_team"] if g["away_team"]==fav else g["away_team"]
    home_is_fav = (g["home_team"]==fav)

    # Prior probabilities
    # If ml_dog missing, we blend spread mapping with any ML fav we have
    p_fair_fav, p_fair_dog, vig = fair_probs(g["ml_fav"], g["ml_dog"])
    p_spread = spread_to_prob(g["spread_fav"])

    pen_fav = injury_penalty(g["inj_out_fav"], g["inj_doubt_fav"], g["inj_quest_fav"])
    pen_dog = injury_penalty(g["inj_out_dog"], g["inj_doubt_dog"], g["inj_quest_dog"])

    home_bump = 0.015 if (pd.isna(g["weather_precip"]) or g["weather_precip"]==0) else 0.010
    sign_home = 1 if home_is_fav else -1

    base_market = p_fair_fav if not pd.isna(p_fair_fav) else p_spread
    base_spread = p_spread if not pd.isna(p_spread) else p_fair_fav
    if pd.isna(base_market) and pd.isna(base_spread):
        p_win = np.nan
    else:
        p_market_adj = max(0,min(1,(base_market if not pd.isna(base_market) else base_spread) + sign_home*home_bump + (pen_dog - pen_fav)))
        p_spread_adj  = max(0,min(1,(base_spread  if not pd.isna(base_spread)  else base_market) + sign_home*home_bump + (pen_dog - pen_fav)))
        p_win = 0.5*(p_market_adj + p_spread_adj)

    # EPA differentials
    off_diff_fav = (g["off_epa_fav"] - g["def_epa_allowed_dog"]) if pd.notna(g["off_epa_fav"]) and pd.notna(g["def_epa_allowed_dog"]) else 0.0
    off_diff_dog = (g["off_epa_dog"] - g["def_epa_allowed_fav"]) if pd.notna(g["off_epa_dog"]) and pd.notna(g["def_epa_allowed_fav"]) else 0.0

    spread = float(g["spread_fav"]) if pd.notna(g["spread_fav"]) else 0.0
    mu_margin = spread + 25*off_diff_fav - 20*off_diff_dog

    # Monte Carlo cover
    p_cover_mc, p_win_mc, pctiles = monte_carlo_cover(spread, mu_margin)

    # Model cover preds: prefer real models if available
    X1 = np.array([[spread, off_diff_fav, off_diff_dog]])
    if 'logit_real' in globals() and logit_real is not None:
        p_cover_logit = float(logit_real.predict_proba(X1)[:,1]); p_cover_gb = float(gb_real.predict_proba(X1)[:,1])
        aucL, aucG = float(auc_logit_real), float(auc_gb_real)
    else:
        p_cover_logit = float(logit_s.predict_proba(X1)[:,1]);     p_cover_gb = float(gb_s.predict_proba(X1)[:,1])
        aucL, aucG = float(aucL_s), float(aucG_s)

    p_cover = float(np.mean([p_cover_mc, 0.5*(p_cover_logit + p_cover_gb)]))

    ats_lean = ('Lean favorite cover' if p_cover>0.53 else 'Neutral / coin‑flip' if 0.47<=p_cover<=0.53 else 'Lean dog +points')

    rows.append({
        "matchup": f"{g['away_team']} @ {g['home_team']}",
        "favorite": fav,
        "line": f"{fav} -{spread:.1f}",
        "total": g["total"],
        "P(win fav)": round(p_win,3) if not pd.isna(p_win) else np.nan,
        "P(cover fav)": round(p_cover,3),
        "ATS lean": ats_lean,
        "median margin": round(pctiles[2],2),
        "p05": round(pctiles[0],2),
        "p95": round(pctiles[4],2),
        "vig_free_ml_win": round(p_fair_fav,3) if not pd.isna(p_fair_fav) else np.nan,
        "spread_win_prob": round(p_spread,3) if not pd.isna(p_spread) else np.nan,
        "home_bump_sign": 1 if home_is_fav else -1,
        "inj_pen_fav": round(pen_fav,3),
        "inj_pen_dog": round(pen_dog,3),
        "off_diff_fav": round(off_diff_fav,3),
        "off_diff_dog": round(off_diff_dog,3),
        "AUC_logit": round(aucL,3),
        "AUC_GBM": round(aucG,3)
    })

summary = pd.DataFrame(rows).sort_values("matchup").reset_index(drop=True)
summary.to_csv(OUTPUT_CSV, index=False)
print(f"✅ Wrote weekly summary to {OUTPUT_CSV}")
summary.head(20)

## 6) Snapshot interpretation from the current slate

In [None]:
import numpy as np, pandas as pd
from pathlib import Path

summary = pd.read_csv(OUTPUT_CSV)

def snapshot(summary: pd.DataFrame):
    # Highest win certainty — top 2 P(win fav)
    top = summary[['matchup','favorite','line','P(win fav)']].dropna().sort_values('P(win fav)', ascending=False).head(2)
    # Tightest games — P(win fav) closest to 0.5 (take 3)
    tight = summary[['matchup','favorite','line','P(win fav)']].dropna().assign(diff=(summary['P(win fav)']-0.5).abs()).sort_values('diff').head(3)
    # Most credible favorite covers — P(cover fav) >= 0.53
    covers = summary.loc[summary['P(cover fav)']>=0.53, ['matchup','favorite','line','P(cover fav)']].sort_values('P(cover fav)', ascending=False)
    # High variance no-plays — huge lines >= 13 and cover near coin flip
    hv = summary.copy()
    hv['abs_line'] = hv['line'].str.extract(r"-([\d\.]+)").astype(float)
    hv = hv.loc[(hv['abs_line']>=13) & (hv['P(cover fav)'].between(0.48,0.52, inclusive='both')), ['matchup','favorite','line','P(cover fav)']]

    print("Snapshot interpretation")
    if len(top):
        print("  • Highest win certainty:", "; ".join([f"{r.favorite} {r.line} in {r.matchup} (≈ {r['P(win fav)']:.0%})" for _,r in top.iterrows()]))
    if len(tight):
        print("  • Tightest games:", "; ".join([f"{r.matchup}" for _,r in tight.iterrows()]))
    if len(covers):
        print("  • Most credible favorite covers:", "; ".join([f\"{r.favorite} {r.line}\" for _,r in covers.iterrows()]))
    if len(hv):
        print("  • High‑variance no‑plays:", "; ".join([f\"{r.favorite} {r.line}\" for _,r in hv.iterrows()]))

snapshot(summary)
summary