Get the current week's data for live game data

In [51]:
# pip install nfl_data_py pandas python-dateutil pytz
import pandas as pd
import nfl_data_py as nfl
from datetime import datetime, timezone
from pathlib import Path
import numpy as np

def find_root(start: Path = Path.cwd()):
    for p in [start, *start.parents]:
        if (p / ".git").exists() or (p / "README.md").exists():
            return p
    return start

def drop_all_empty_columns(df: pd.DataFrame) -> pd.DataFrame:
    tmp = df.replace("", np.nan)
    keep_cols = tmp.columns[tmp.notna().any(axis=0)]
    return df[keep_cols]

def export_current_week_like_historical() -> pd.DataFrame:
    now_utc = datetime.now(timezone.utc)
    season = now_utc.year
    seasons = [season]

    # 1) Schedules (+ Winner)
    sch = nfl.import_schedules(seasons)
    base_cols = ["game_id","season","week","gameday","home_team","away_team","home_score","away_score"]
    base_cols = [c for c in base_cols if c in sch.columns]
    df = (
        sch[base_cols]
        .rename(columns={"gameday": "date"})
        .assign(
            Winner=lambda d: d.apply(
                lambda r: r["home_team"] if (pd.notna(r.get("home_score")) and pd.notna(r.get("away_score")) and r["home_score"] > r["away_score"])
                else (r["away_team"] if (pd.notna(r.get("home_score")) and pd.notna(r.get("away_score")) and r["away_score"] > r["home_score"]) else "TIE"),
                axis=1
            ) if {"home_score","away_score"}.issubset(d.columns) else ""
        )
    )

    # Build kickoff datetimes for week detection
    sched_dt = sch.copy()
    if "game_datetime" in sched_dt.columns:
        sched_dt["kickoff_datetime_utc"] = pd.to_datetime(sched_dt["game_datetime"], utc=True, errors="coerce")
    elif {"gameday","gametime"}.issubset(sched_dt.columns):
        sched_dt["kickoff_datetime_utc"] = pd.to_datetime(
            sched_dt["gameday"].astype(str) + " " + sched_dt["gametime"].astype(str),
            utc=True, errors="coerce"
        )
    else:
        sched_dt["kickoff_datetime_utc"] = pd.NaT

    # Current week = the latest week that has started; if none, the next upcoming week
    mask_has_time = sched_dt["kickoff_datetime_utc"].notna()
    past = sched_dt[mask_has_time & (sched_dt["kickoff_datetime_utc"] <= now_utc)]
    upcoming = sched_dt[mask_has_time & (sched_dt["kickoff_datetime_utc"] > now_utc)]
    if not past.empty:
        current_week = int(past["week"].max())
    elif not upcoming.empty:
        current_week = int(upcoming["week"].min())
    else:
        current_week = int(sched_dt["week"].max())

    # 2) Team-game stats aggregation
    pbp = nfl.import_pbp_data(seasons, downcast=True)
    p = pbp[pbp["posteam"].notna()].copy()
    grp = p.groupby(["game_id","posteam"], as_index=False)
    isum = lambda s: s.fillna(0).astype(int).sum()
    fsum = lambda s: s.fillna(0).sum()

    team_stats = grp.agg(
        pass_cmp=("complete_pass", isum),
        pass_att=("pass_attempt", isum),
        pass_yds=("passing_yards", fsum),
        pass_td =("pass_touchdown", isum),
        pass_int=("interception", isum),
        pass_sacked=("sack", isum),
        pass_sacked_yds=("sack_yards", fsum) if "sack_yards" in p.columns else ("play_id", lambda s: 0),
        rush_att=("rush_attempt", isum),
        rush_yds=("rushing_yards", fsum),
        rush_td =("rush_touchdown", isum),
        first_down=("first_down", isum),
        penalties=("penalty", isum),
        penalties_yds=("penalty_yards", fsum) if "penalty_yards" in p.columns else ("play_id", lambda s: 0),
        fumbles_lost=("fumble_lost", isum) if "fumble_lost" in p.columns else ("play_id", lambda s: 0),
        plays_offense=("play_id", "count"),
        fga=("field_goal_attempt", isum) if "field_goal_attempt" in p.columns else ("play_id", lambda s: 0),
        fgm=("field_goal_result", lambda s: (p.loc[s.index,"field_goal_result"].fillna("").eq("made")).sum())
            if "field_goal_result" in p.columns else ("play_id", lambda s: 0),
        xpa=("extra_point_attempt", isum) if "extra_point_attempt" in p.columns else ("play_id", lambda s: 0),
        xpm=("extra_point_result", lambda s: (p.loc[s.index,"extra_point_result"].fillna("").eq("good")).sum())
            if "extra_point_result" in p.columns else ("play_id", lambda s: 0),
        punt=("punt", isum) if "punt" in p.columns else ("play_id", lambda s: 0),
        punt_yds=("punt_yards", fsum) if "punt_yards" in p.columns else ("play_id", lambda s: 0),
    )
    team_stats["turnovers"] = team_stats["pass_int"] + team_stats.get("fumbles_lost", 0)

    home = (team_stats.rename(columns={"posteam":"home_team"})
                        .add_prefix("home_")
                        .rename(columns={"home_game_id":"game_id","home_home_team":"home_team"}))
    away = (team_stats.rename(columns={"posteam":"away_team"})
                        .add_prefix("away_")
                        .rename(columns={"away_game_id":"game_id","away_away_team":"away_team"}))
    df = (df.merge(home, on=["game_id","home_team"], how="left")
            .merge(away, on=["game_id","away_team"], how="left"))

    # 3) Optional schedule fields
    SCHED_OPTIONAL = [
        "spread_line","total_line","over_under_line",
        "home_moneyline","away_moneyline",
        "stadium","roof","surface","weather","temp","wind",
        "neutral_site","international","game_type","season_type",
        "weekday","gametime","location","home_coach","away_coach","referee"
    ]
    avail = [c for c in SCHED_OPTIONAL if c in sch.columns]
    if avail:
        extras = sch[["game_id"] + avail].copy()
        for c in ["spread_line","total_line","over_under_line","home_moneyline","away_moneyline","temp","wind"]:
            if c in extras.columns:
                extras[c] = pd.to_numeric(extras[c], errors="coerce")
        df = df.merge(extras, on="game_id", how="left")

    # 4) Resolve _x/_y
    x_bases = {c[:-2] for c in df.columns if c.endswith("_x")}
    for base in x_bases:
        x, y = base + "_x", base + "_y"
        if x in df.columns and y in df.columns:
            df.drop(columns=[y], inplace=True)
            df.rename(columns={x: base}, inplace=True)
    rename_map = {}
    for c in df.columns:
        if c.endswith("_x") and c[:-2] not in df.columns: rename_map[c] = c[:-2]
        if c.endswith("_y") and c[:-2] not in df.columns: rename_map[c] = c[:-2]
    if rename_map: df = df.rename(columns=rename_map)

    # 5) Filter to the computed current week (include both past and future games this week)
    df_week = df[(df["season"] == season) & (df["week"] == current_week)].copy()

    # Drop columns that are entirely empty within THIS WEEK only
    df_week = drop_all_empty_columns(df_week)

    # 6) Write
    ROOT = find_root()
    out_dir = ROOT / "nfl_moneyline" / "current_data"
    out_dir.mkdir(parents=True, exist_ok=True)
    csv_name = out_dir / "schedules_current_week.csv"
    df_week.to_csv(csv_name, index=False)

    print(f"[OK] wrote {csv_name}  rows={len(df_week)}  week={current_week} season={season}")
    return df_week

# Run it
export_current_week_like_historical()


2025 done.
Downcasting floats.
[OK] wrote /Users/genevievekochel/Downloads/WSU_Classes/SportsBettingProj/nfl_model_lab/nfl_moneyline/current_data/schedules_current_week.csv  rows=16  week=3 season=2025


Unnamed: 0,game_id,season,week,date,home_team,away_team,home_score,away_score,Winner,home_pass_cmp,...,surface,temp,wind,game_type,weekday,gametime,location,home_coach,away_coach,referee
32,2025_03_MIA_BUF,2025,3,2025-09-18,BUF,MIA,31.0,21.0,BUF,22.0,...,a_turf,68.0,7.0,REG,Thursday,20:15,Home,Sean McDermott,Mike McDaniel,Alan Eck
33,2025_03_ATL_CAR,2025,3,2025-09-21,CAR,ATL,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Dave Canales,Raheem Morris,
34,2025_03_GB_CLE,2025,3,2025-09-21,CLE,GB,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Kevin Stefanski,Matt LaFleur,
35,2025_03_HOU_JAX,2025,3,2025-09-21,JAX,HOU,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Liam Coen,DeMeco Ryans,
36,2025_03_CIN_MIN,2025,3,2025-09-21,MIN,CIN,,,TIE,,...,sportturf,,,REG,Sunday,13:00,Home,Kevin O'Connell,Zac Taylor,
37,2025_03_PIT_NE,2025,3,2025-09-21,NE,PIT,,,TIE,,...,fieldturf,,,REG,Sunday,13:00,Home,Mike Vrabel,Mike Tomlin,
38,2025_03_LA_PHI,2025,3,2025-09-21,PHI,LA,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Nick Sirianni,Sean McVay,
39,2025_03_NYJ_TB,2025,3,2025-09-21,TB,NYJ,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Todd Bowles,Aaron Glenn,
40,2025_03_IND_TEN,2025,3,2025-09-21,TEN,IND,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Brian Callahan,Shane Steichen,
41,2025_03_LV_WAS,2025,3,2025-09-21,WAS,LV,,,TIE,,...,grass,,,REG,Sunday,13:00,Home,Dan Quinn,Pete Carroll,


Get the current week's injury report

In [46]:
import time
import requests
import pandas as pd
from pathlib import Path
from datetime import datetime, timezone

HEADERS = {
    "User-Agent": "Mozilla/5.0 (personal research scraper)",
    "Accept": "application/json, text/plain, */*",
}

def _get_json(url: str):
    r = requests.get(url, headers=HEADERS, timeout=25)
    r.raise_for_status()
    return r.json()

def _deref(obj):
    """If obj is {'$ref': url} (or {'href': url}), fetch and return the JSON. Else return obj (or {})."""
    if not isinstance(obj, dict):
        return {}
    ref = obj.get("$ref") or obj.get("href")
    if ref:
        return _get_json(ref)
    return obj

def get_espn_team_map():
    """
    Returns dict: {espn_abbr: {'id': int, 'name': str}}
    ESPN abbrs are like ARI, ATL, LAC, LAR, LV, NO, NE, WAS, etc.
    """
    base = "https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/teams"
    data = _get_json(base)
    teams = {}
    for item in data.get("items", []):
        t = _deref(item)
        abbr = (t.get("abbreviation") or "").upper()
        tid = int(t.get("id"))
        teams[abbr] = {"id": tid, "name": t.get("displayName")}
        time.sleep(0.05)
    return teams

def get_current_season_and_week():
    """
    Uses ESPN calendar to determine current season and week based on UTC 'now'.
    """
    now = datetime.now(timezone.utc)
    season = now.year
    cal = _get_json(f"https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/calendar?season={season}")
    # Calendar has 'entries' (preseason, regular, postseason). Regular has 'entries' with week ranges.
    # Find the entry where 'type' name is 'Regular Season' (or contains 'regular'), then pick week whose start<=now<=end.
    reg = None
    for e in cal.get("entries", []):
        ee = _deref(e)
        tinfo = _deref(ee.get("type", {}))
        tname = (tinfo.get("name") or "").lower() + " " + (tinfo.get("description") or "").lower()
        if "regular" in tname:
            reg = ee
            break

    week_num = None
    if reg:
        for we in reg.get("entries", []):
            w = _deref(we)
            # Weeks have startDate and endDate
            start = w.get("startDate")
            end = w.get("endDate")
            try:
                start_dt = datetime.fromisoformat(start.replace("Z", "+00:00"))
                end_dt = datetime.fromisoformat(end.replace("Z", "+00:00"))
            except Exception:
                continue
            if start_dt <= now <= end_dt:
                # ESPN puts 'label' like 'Week 3' and 'value' as the number
                week_num = w.get("value") or w.get("number")
                try:
                    week_num = int(week_num)
                except Exception:
                    # fall back by parsing label
                    lbl = str(w.get("label") or "")
                    if lbl.lower().startswith("week"):
                        week_num = int(lbl.split()[-1])
                break

    return season, week_num

def fetch_espn_team_injuries(team_id: int, team_abbr_out: str) -> pd.DataFrame:
    """
    Fetch injuries for one team from ESPN v2.
    """
    season = datetime.utcnow().year
    coll = _get_json(
        f"https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/teams/{team_id}/injuries?season={season}"
    )
    items = coll.get("items") or []
    rows = []
    for it in items:
        injury = _deref(it)
        entry_list = injury.get("entries") if isinstance(injury, dict) else None
        if entry_list:
            for e in entry_list:
                e = _deref(e)
                athlete = _deref(e.get("athlete", {}))
                status  = _deref(e.get("status", {}))
                i_type  = _deref(e.get("type", {}))
                rows.append({
                    "team": team_abbr_out,
                    "player_id": athlete.get("id"),
                    "player": athlete.get("displayName"),
                    "position": (athlete.get("position") or {}).get("abbreviation") if isinstance(athlete.get("position"), dict) else None,
                    "status": status.get("displayName") or status.get("name"),
                    "status_detail": e.get("statusDetail"),
                    "type": i_type.get("displayName") or i_type.get("name"),
                    "date": e.get("date"),
                    "source": "espn_v2",
                })
        else:
            athlete = _deref(injury.get("athlete", {}))
            status  = _deref(injury.get("status", {}))
            i_type  = _deref(injury.get("type", {}))
            rows.append({
                "team": team_abbr_out,
                    "player_id": athlete.get("id"),
                    "player": athlete.get("displayName"),
                    "position": (athlete.get("position") or {}).get("abbreviation") if isinstance(athlete.get("position"), dict) else None,
                    "status": status.get("displayName") or status.get("name"),
                    "status_detail": injury.get("statusDetail"),
                    "type": i_type.get("displayName") or i_type.get("name"),
                    "date": injury.get("date"),
                    "source": "espn_v2",
            })
        time.sleep(0.12)  # be polite
    df = pd.DataFrame(rows)
    if not df.empty:
        keep = df[["player", "status", "type"]].notna().any(axis=1)
        df = df[keep].copy()
    return df

def normalize_status(s: str) -> str:
    s = (s or "").strip().lower()
    # ESPN sometimes returns "Questionable", "Out", "Doubtful", "Active", or codes like INJURY_STATUS_*
    s = s.replace("injury_status_", "")
    if "questionable" in s:
        return "QUESTIONABLE"
    if "doubtful" in s:
        return "DOUBTFUL"
    # treat IR / PUP / NFI / Reserve as OUT for this filter
    if s == "out" or "reserve" in s or "ir" == s or "injured reserve" in s or "pup" in s or "nfi" in s:
        return "OUT"
    return "ACTIVE"

def normalize_team_for_output(espn_abbr: str) -> str:
    """
    If you want your file to use your earlier codes, map here.
    """
    # Example: prefer LA (Rams) and WSH (Commanders)
    mapping = {
        "LAR": "LA",
        "WAS": "WSH",
    }
    return mapping.get(espn_abbr, espn_abbr)

def main():
    # 1) Where to save: ../current_data/injury_current_report.csv (parent of CWD)
    parent = Path.cwd().parent
    out_dir = parent / "current_data"
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / "injury_current_report.csv"

    # 2) Current season & week
    season, week = get_current_season_and_week()

    # 3) Build team map dynamically
    teams = get_espn_team_map()

    # 4) Pull every team (skip non-32 teams like Pro Bowl, etc., by filtering ids with abbreviation present)
    frames = []
    for espn_abbr, meta in sorted(teams.items()):
        # Skip legacy/defunct abbreviations if any slip in
        if len(espn_abbr) not in (2, 3): 
            continue
        tid = meta["id"]
        out_abbr = normalize_team_for_output(espn_abbr)
        try:
            df_team = fetch_espn_team_injuries(tid, out_abbr)
            frames.append(df_team)
            print(f"[OK] {espn_abbr} (id {tid}) -> {len(df_team)} rows")
        except requests.HTTPError as e:
            print(f"[HTTP {espn_abbr}] {e}")
        except Exception as e:
            print(f"[ERR  {espn_abbr}] {e}")

    if not frames:
        print("No data pulled; not writing a file.")
        return

    df = pd.concat(frames, ignore_index=True)

    # --- keep only questionable or out ---

    INCLUDE_STATUSES = {"QUESTIONABLE", "OUT", "DOUBTFUL"}  # change if you want to add "DOUBTFUL", "IR", etc.

    df["status_norm"] = df["status"].astype(str).map(normalize_status)
    df = df[df["status_norm"].isin(INCLUDE_STATUSES)].copy()


    # 5) Add season/week columns if available
    df["season"] = season
    if week is not None:
        df["week"] = int(week)

    # 6) Sort and write
    df = df.sort_values(["team", "player"].copy() if "player" in df.columns else ["team"])
    drop_all_empty_columns(df)
    df.to_csv(out_path, index=False)
    print(f"Saved {len(df)} rows to {out_path}")

if __name__ == "__main__":
    main()


[OK] ARI (id 22) -> 25 rows
[OK] ATL (id 1) -> 25 rows
[OK] BAL (id 33) -> 25 rows
[OK] BUF (id 2) -> 25 rows
[OK] CAR (id 29) -> 25 rows
[OK] CHI (id 3) -> 25 rows
[OK] CIN (id 4) -> 25 rows
[OK] CLE (id 5) -> 25 rows
[OK] DAL (id 6) -> 25 rows
[OK] DEN (id 7) -> 25 rows
[OK] DET (id 8) -> 25 rows
[OK] GB (id 9) -> 25 rows
[OK] HOU (id 34) -> 25 rows
[OK] IND (id 11) -> 25 rows
[OK] JAX (id 30) -> 25 rows
[OK] KC (id 12) -> 25 rows
[OK] LAC (id 24) -> 25 rows
[OK] LAR (id 14) -> 25 rows
[OK] LV (id 13) -> 25 rows
[OK] MIA (id 15) -> 25 rows
[OK] MIN (id 16) -> 25 rows
[OK] NE (id 17) -> 25 rows
[OK] NO (id 18) -> 25 rows
[OK] NYG (id 19) -> 25 rows
[OK] NYJ (id 20) -> 25 rows
Saved 625 rows to /Users/genevievekochel/Downloads/WSU_Classes/SportsBettingProj/nfl_model_lab/nfl_moneyline/current_data/injury_current_report.csv
