# NFL scoring plays
> This notebook fetches all scroring plays from the ESPN API from 1999 to the most-recent season. Other API endpoints detailed in [this gist](https://gist.github.com/nntrn/ee26cb2a0716de0947a0a4e9a157bc1c).

---

#### Import Python tools and Jupyter config

In [1]:
import json
import time
import requests
import datetime
import pandas as pd
import jupyter_black
from tqdm.notebook import tqdm

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None

---

In [3]:
def weeks_for_season(s):
    return range(1, 19) if s >= 2021 else range(1, 18)

In [4]:
FIRST_SEASON = 2004
MOST_RECENT_SEASON = 2024
seasons = range(FIRST_SEASON, MOST_RECENT_SEASON + 1)

In [5]:
def get_games(season, week):
    """Yield dicts of game_id + game_date for one week."""
    url = (
        "https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard"
        f"?dates={season}&seasontype=2&week={week}"
    )
    r = requests.get(url)
    r.raise_for_status()
    for e in r.json().get("events", []):
        yield {
            "season": season,
            "week": week,
            "game_id": e["id"],
            "game_date": e["date"],
        }

In [6]:
def fetch_all_scoring_plays(game_id):
    url = f"https://cdn.espn.com/core/nfl/playbyplay?xhr=1&gameId={game_id}"
    js = requests.get(url).json().get("gamepackageJSON", {})

    # build our home/away lookup as before
    comps = js["header"]["competitions"][0]["competitors"]
    teams = {
        c["team"]["id"]: {"name": c["team"]["displayName"], "ha": c["homeAway"]}
        for c in comps
    }
    home = next(c["team"]["displayName"] for c in comps if c["homeAway"] == "home")
    away = next(c["team"]["displayName"] for c in comps if c["homeAway"] == "away")

    rows = []
    for drv in js.get("drives", {}).get("previous", []):
        for p in drv.get("plays", []):
            # only keep actual score‐changing plays
            if p.get("homeScore") is None and p.get("awayScore") is None:
                continue

            # grab the team object, fallback to start.team
            team_obj = p.get("team") or p.get("start", {}).get("team")
            if not team_obj or str(team_obj.get("id")) not in teams:
                # nothing we can do if there's no team linkage
                continue

            tid = str(team_obj["id"])
            ptype = p.get("type", {})
            desc = ptype.get("text") or ptype.get("abbreviation") or ""

            rows.append(
                {
                    "game_id": game_id,
                    "home_team": home,
                    "away_team": away,
                    "scoring_team": teams[tid]["name"],
                    "home_away": teams[tid]["ha"],
                    "quarter": p.get("period", {}).get("number"),
                    "clock": p.get("clock", {}).get("displayValue"),
                    "type": desc,
                    "description": p.get("text"),
                    "score_home": p.get("homeScore"),
                    "score_away": p.get("awayScore"),
                    "yard_line": p.get("start", {}).get("possessionText"),
                }
            )

    return rows

In [7]:
# Collect game_ids + dates
game_records = []
for season in tqdm(seasons):
    for week in weeks_for_season(season):
        for g in get_games(season, week):
            game_records.append(g)

df_games = pd.DataFrame(game_records)

  0%|          | 0/21 [00:00<?, ?it/s]

In [8]:
# Fetch scoring plays with tqdm
all_plays = []
for gid in tqdm(df_games["game_id"], desc="Fetching scoring plays"):
    all_plays.extend(fetch_all_scoring_plays(gid))

df_scoring = pd.DataFrame(all_plays)

Fetching scoring plays:   0%|          | 0/5442 [00:00<?, ?it/s]

In [9]:
# Merge the games metadata back into the scoring DataFrame
df_scoring_merge = df_scoring.merge(
    df_games[["game_id", "game_date", "season", "week"]],
    on="game_id",
    how="left",
)

In [10]:
df_scoring_merge.to_json(
    f"data/processed/nfl_scoring_{FIRST_SEASON}-{MOST_RECENT_SEASON}.json",
    indent=4,
    orient="records",
)
df_scoring_merge.to_csv(
    f"data/processed/nfl_scoring_{FIRST_SEASON}-{MOST_RECENT_SEASON}.csv", index=False
)