In [None]:
import nfl_data_py as nfl
import pandas as pd

### Constants

In [None]:
season = 2025  # Update this to the season you're interested in

### Game Results

In [108]:
import pandas as pd
from datetime import datetime, timezone
from urllib.error import HTTPError
from nfl_data_py import import_schedules, import_weekly_data, import_pbp_data


# ---------- Helpers ----------
def _coerce_game_datetime(sched: pd.DataFrame) -> pd.Series:
    """Return a UTC datetime for each game with best-effort schema handling."""
    if "start_time" in sched.columns:
        return pd.to_datetime(sched["start_time"], errors="coerce", utc=True)

    # Fallback: use gameday assumed noon ET when no explicit time exists
    dt = pd.to_datetime(sched.get("gameday"), errors="coerce")
    return (
        dt.dt.tz_localize("America/New_York", nonexistent="shift_forward", ambiguous="NaT")
          .dt.tz_convert("UTC")
    )


def _games_frame(season: int) -> pd.DataFrame:
    """Load regular-season schedule/results for a season, keep finished games only."""
    sched = import_schedules([season])
    sched = sched[(sched["game_type"] == "REG") & sched["home_score"].notna()].copy()
    sched["game_datetime"] = _coerce_game_datetime(sched)

    cols = ["game_id", "week", "home_team", "away_team", "home_score", "away_score", "game_datetime"]
    games = sched[cols].copy()

    # only games that have started relative to now
    now_utc = datetime.now(timezone.utc)
    games = games[games["game_datetime"] <= now_utc].copy()

    games["score_differential"] = games["home_score"] - games["away_score"]
    return games


def _pick_team_col(df: pd.DataFrame) -> str:
    """Find the team column in weekly data across versions."""
    for c in ["team", "recent_team", "posteam", "club_code"]:
        if c in df.columns:
            return c
    raise KeyError("No team-like column found in weekly data (expected one of team/recent_team/posteam/club_code).")


def _standardize_weekly_columns(weekly: pd.DataFrame) -> pd.DataFrame:
    """Normalize weekly stat column names to a standard set."""
    alias = {
        # attempts / yards
        "rush_att": "rush_att", "rushing_att": "rush_att", "rushing_attempts": "rush_att",
        "pass_att": "pass_att", "passing_att": "pass_att", "passing_attempts": "pass_att",
        "rush_yds": "rush_yds", "rushing_yds": "rush_yds", "rushing_yards": "rush_yds",
        "pass_yds": "pass_yds", "passing_yds": "pass_yds", "passing_yards": "pass_yds",
        # touchdowns
        "rush_tds": "rush_tds", "rushing_tds": "rush_tds",
        "rec_tds": "rec_tds", "receiving_tds": "rec_tds",
        "kick_ret_tds": "kick_ret_tds",
        "punt_ret_tds": "punt_ret_tds",
        "def_int_tds": "def_int_tds",
        "fumble_rec_tds": "fumble_rec_tds",
        "other_tds": "other_tds",
    }
    have = {src: alias[src] for src in alias if src in weekly.columns}
    return weekly.rename(columns=have) if have else weekly


def _from_weekly(weekly: pd.DataFrame, games: pd.DataFrame) -> pd.DataFrame:
    """Aggregate WEEKLY player stats to team-game with player ID lists."""
    team_col = _pick_team_col(weekly)
    weekly = _standardize_weekly_columns(weekly)

    base_cols = [c for c in [team_col, "game_id", "week", "player_id"] if c in weekly.columns]
    metric_cols = [c for c in [
        "rush_att", "pass_att", "rush_yds", "pass_yds",
        "rush_tds", "rec_tds", "kick_ret_tds", "punt_ret_tds",
        "def_int_tds", "fumble_rec_tds", "other_tds"
    ] if c in weekly.columns]

    df = weekly[base_cols + metric_cols].copy()

    # Aggregate to team-game & collect unique player IDs
    sum_cols = [c for c in metric_cols if c.endswith("_att") or c.endswith("_yds") or c.endswith("_tds")]
    team_game = (
        df.groupby([team_col, "game_id", "week"], as_index=False)
          .agg({
              **{c: "sum" for c in sum_cols},
              "player_id": lambda x: list(pd.Series(x).dropna().astype(str).unique())
          })
          .rename(columns={team_col: "team", "player_id": "player_ids"})
    )

    # Build total_tds defensively
    for c in ["rush_tds", "rec_tds", "kick_ret_tds", "punt_ret_tds", "def_int_tds", "fumble_rec_tds", "other_tds"]:
        if c not in team_game.columns:
            team_game[c] = 0
    team_game["total_tds"] = (
        team_game["rush_tds"] + team_game["rec_tds"] +
        team_game["kick_ret_tds"] + team_game["punt_ret_tds"] +
        team_game["def_int_tds"] + team_game["fumble_rec_tds"] + team_game["other_tds"]
    )

    team_game = team_game.merge(games[["game_id", "game_datetime"]], on="game_id", how="left")
    return team_game


def _from_pbp(pbp: pd.DataFrame, games: pd.DataFrame) -> pd.DataFrame:
    """Aggregate PBP to team-game metrics + player ID lists (safe for modern pandas)."""
    pbp = pbp[pbp["game_id"].isin(games["game_id"])].copy()

    if "posteam" not in pbp.columns:
        raise KeyError("PBP missing 'posteam' column; cannot assemble team metrics.")
    pbp = pbp.rename(columns={"posteam": "team"})

    # attempts by play flags
    pbp["rush_att"] = pbp.get("rush_attempt", 0)
    pbp["pass_att"] = pbp.get("pass_attempt", 0)

    # yards by play type
    rush_yds = (
        pbp[pbp["rush_att"] == 1]
        .groupby(["team", "game_id", "week"], as_index=False)["yards_gained"]
        .sum()
        .rename(columns={"yards_gained": "rush_yds"})
    )
    pass_yds = (
        pbp[pbp["pass_att"] == 1]
        .groupby(["team", "game_id", "week"], as_index=False)["yards_gained"]
        .sum()
        .rename(columns={"yards_gained": "pass_yds"})
    )

    base = (
        pbp.groupby(["team", "game_id", "week"], as_index=False)
        .agg(rush_att=("rush_att", "sum"),
             pass_att=("pass_att", "sum"))
    )

    # Total TDs for the scoring team (off/def/returns when available)
    if "td_team" in pbp.columns and "touchdown" in pbp.columns:
        tds = (
            pbp[pbp["touchdown"] == 1]
            .groupby(["td_team", "game_id", "week"], as_index=False)
            .size()
            .rename(columns={"td_team": "team", "size": "total_tds"})
        )
    else:
        # Offensive TDs only fallback
        tds = (
            pbp[(pbp.get("pass_touchdown", 0) == 1) | (pbp.get("rush_touchdown", 0) == 1)]
            .groupby(["team", "game_id", "week"], as_index=False)
            .size()
            .rename(columns={"size": "total_tds"})
        )

    # Player IDs: melt participant columns → unique list
    id_cols = [c for c in ["passer_player_id", "rusher_player_id", "receiver_player_id", "td_player_id"] if c in pbp.columns]
    if id_cols:
        long_ids = (
            pbp.melt(
                id_vars=["team", "game_id", "week"],
                value_vars=id_cols,
                value_name="pid"
            )[["team", "game_id", "week", "pid"]]
            .dropna(subset=["pid"])
        )
        long_ids = long_ids[long_ids["pid"].astype(str).str.len() > 0]

        ids = (
            long_ids.groupby(["team", "game_id", "week"])["pid"]
            .unique()
            .apply(lambda arr: [str(x) for x in arr])
            .reset_index()
            .rename(columns={"pid": "player_ids"})
        )
    else:
        ids = base[["team", "game_id", "week"]].assign(player_ids=[[]] * len(base))

    team_game = (
        base.merge(rush_yds, on=["team", "game_id", "week"], how="left")
            .merge(pass_yds, on=["team", "game_id", "week"], how="left")
            .merge(tds,     on=["team", "game_id", "week"], how="left")
            .merge(ids,     on=["team", "game_id", "week"], how="left")
    )

    for c in ["rush_yds", "pass_yds", "total_tds"]:
        if c not in team_game.columns:
            team_game[c] = 0
        team_game[c] = team_game[c].fillna(0)

    team_game = team_game.merge(games[["game_id", "game_datetime"]], on="game_id", how="left")
    return team_game


def _to_date_avgs(team_game: pd.DataFrame) -> pd.DataFrame:
    """Compute season-to-date averages through the previous game for each team."""
    metrics = ["rush_att", "pass_att", "rush_yds", "pass_yds", "total_tds"]
    for m in metrics:
        if m not in team_game.columns:
            team_game[m] = 0

    def _per_team(df: pd.DataFrame) -> pd.DataFrame:
        df = df.sort_values("game_datetime").copy()
        for m in metrics:
            df[f"avg_{m}_to_date"] = df[m].expanding().mean().shift(1)
        return df

    return (
        team_game.groupby("team", group_keys=False)
                 .apply(_per_team)
                 .reset_index(drop=True)
    )


# ---------- Main API ----------
def get_game_results_with_date_time(season: int) -> pd.DataFrame:
    """
    Returns a DataFrame with:
      - game_id, week, home_team, away_team, home_score, away_score, score_differential
      - game_datetime (UTC)
      - player_ids_home, player_ids_away (lists of player IDs who appeared in the game for that team)
      - to-date (pre-game) averages per team:
          avg_rush_att_to_date_*, avg_pass_att_to_date_*,
          avg_rush_yds_to_date_*, avg_pass_yds_to_date_*,
          avg_total_tds_to_date_*
    """
    games = _games_frame(season)

    # Try WEEKLY first; on 404 (season parquet not published), fall back to PBP
    try:
        weekly = import_weekly_data([season])
        team_game = _from_weekly(weekly, games)
    except Exception as e:
        if isinstance(e, HTTPError) and getattr(e, "code", None) == 404:
            pbp = import_pbp_data([season])
            team_game = _from_pbp(pbp, games)
        else:
            raise

    team_game = _to_date_avgs(team_game)

    # Merge into home/away views
    home = team_game.add_suffix("_home").rename(columns={
        "team_home": "home_team", "game_id_home": "game_id", "week_home": "week"
    })
    away = team_game.add_suffix("_away").rename(columns={
        "team_away": "away_team", "game_id_away": "game_id", "week_away": "week"
    })

    out = (
        games.merge(home, on=["game_id", "week", "home_team"], how="left")
             .merge(away, on=["game_id", "week", "away_team"], how="left")
    )

    final_cols = [
        "game_id", "week", "home_team", "away_team",
        "home_score", "away_score", "score_differential", "game_datetime",
        "player_ids_home", "player_ids_away",
        "avg_rush_att_to_date_home", "avg_rush_att_to_date_away",
        "avg_pass_att_to_date_home", "avg_pass_att_to_date_away",
        "avg_rush_yds_to_date_home", "avg_rush_yds_to_date_away",
        "avg_pass_yds_to_date_home", "avg_pass_yds_to_date_away",
        "avg_total_tds_to_date_home", "avg_total_tds_to_date_away",
    ]
    return out[final_cols].sort_values(["week", "game_datetime"]).reset_index(drop=True)





In [113]:
if __name__ == "__main__":
    current_year = datetime.now(timezone.utc).year
    df = get_game_results_with_date_time(current_year)
    print(df.tail(5))  # 👈 prints the LAST 5 game

2025 done.
Downcasting floats.
             game_id  week home_team away_team  home_score  away_score  \
103   2025_07_GB_ARI     7       ARI        GB        23.0        27.0   
104  2025_07_WAS_DAL     7       DAL       WAS        44.0        22.0   
105   2025_07_ATL_SF     7        SF       ATL        20.0        10.0   
106   2025_07_TB_DET     7       DET        TB        24.0         9.0   
107  2025_07_HOU_SEA     7       SEA       HOU        27.0        19.0   

     score_differential             game_datetime  \
103                -4.0 2025-10-19 04:00:00+00:00   
104                22.0 2025-10-19 04:00:00+00:00   
105                10.0 2025-10-19 04:00:00+00:00   
106                15.0 2025-10-20 04:00:00+00:00   
107                 8.0 2025-10-20 04:00:00+00:00   

                                       player_ids_home  \
103  [00-0033119, 00-0037157, 00-0036924, 00-003562...   
104  [00-0033077, 00-0036997, 00-0040586, 00-003635...   
105  [00-0036972, 00-0033280, 0

In [114]:
df

Unnamed: 0,game_id,week,home_team,away_team,home_score,away_score,score_differential,game_datetime,player_ids_home,player_ids_away,avg_rush_att_to_date_home,avg_rush_att_to_date_away,avg_pass_att_to_date_home,avg_pass_att_to_date_away,avg_rush_yds_to_date_home,avg_rush_yds_to_date_away,avg_pass_yds_to_date_home,avg_pass_yds_to_date_away,avg_total_tds_to_date_home,avg_total_tds_to_date_away
0,2025_01_DAL_PHI,1,PHI,DAL,24.0,20.0,4.0,2025-09-04 04:00:00+00:00,"[00-0036389, 00-0034844, 00-0039746, 00-003626...","[00-0033077, 00-0036997, 00-0037801, 00-003524...",,,,,,,,,,
1,2025_01_KC_LAC,1,LAC,KC,27.0,21.0,6.0,2025-09-05 04:00:00+00:00,"[00-0036355, 00-0040666, 00-0038573, 00-003689...","[00-0033873, 00-0037197, 00-0039894, 00-003392...",,,,,,,,,,
2,2025_01_TB_ATL,1,ATL,TB,20.0,23.0,-3.0,2025-09-07 04:00:00+00:00,"[00-0039917, 00-0037263, 00-0038542, 00-003939...","[00-0034855, 00-0039361, 00-0038951, 00-003725...",,,,,,,,,,
3,2025_01_CIN_CLE,1,CLE,CIN,16.0,17.0,-1.0,2025-09-07 04:00:00+00:00,"[00-0026158, 00-0040162, 00-0037267, 00-004046...","[00-0036442, 00-0038597, 00-0035644, 00-003641...",,,,,,,,,,
4,2025_01_MIA_IND,1,IND,MIA,33.0,8.0,25.0,2025-09-07 04:00:00+00:00,"[00-0035710, 00-0036223, 00-0040128, 00-004017...","[00-0036212, 00-0037013, 00-0039040, 00-004019...",,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,2025_07_GB_ARI,7,ARI,GB,23.0,27.0,-4.0,2025-10-19 04:00:00+00:00,"[00-0033119, 00-0037157, 00-0036924, 00-003562...","[00-0036264, 00-0035700, 00-0038797, 00-004066...",24.500000,30.800000,37.333333,31.200000,113.166667,122.200000,193.833333,237.400000,2.333333,3.0
104,2025_07_WAS_DAL,7,DAL,WAS,44.0,22.0,22.0,2025-10-19 04:00:00+00:00,"[00-0033077, 00-0036997, 00-0040586, 00-003635...","[00-0039910, 00-0032268, 00-0040242, 00-003395...",24.000000,27.333333,40.333333,31.333333,117.166667,151.000000,270.666667,195.333333,3.333333,3.0
105,2025_07_ATL_SF,7,SF,ATL,20.0,10.0,10.0,2025-10-19 04:00:00+00:00,"[00-0036972, 00-0033280, 00-0037746, 00-002989...","[00-0039917, 00-0038542, 00-0037263, 00-003697...",26.833333,31.800000,42.500000,34.200000,82.166667,151.200000,291.833333,227.600000,1.833333,2.0
106,2025_07_TB_DET,7,DET,TB,24.0,9.0,15.0,2025-10-20 04:00:00+00:00,"[00-0033106, 00-0039139, 00-0035685, 00-003696...","[00-0034855, 00-0037256, 00-0038951, 00-003123...",29.166667,27.333333,30.666667,34.500000,128.666667,109.333333,225.000000,244.500000,4.000000,3.0
