# NFL scoring plays
> This notebook fetches all scroring plays from the ESPN API from 1999 to the most-recent season. Other API endpoints detailed in [this gist](https://gist.github.com/nntrn/ee26cb2a0716de0947a0a4e9a157bc1c).

---

#### Import Python tools and Jupyter config

In [30]:
import json
import time
import requests
import datetime
import pandas as pd
import jupyter_black
from tqdm.notebook import tqdm

In [31]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None

In [32]:
FIRST_SEASON = 2004
MOST_RECENT_SEASON = 2024

---

In [33]:
df_scoring = pd.read_json("data/processed/nfl_scoring_2004-2024.json")

In [34]:
df_scoring.head()

Unnamed: 0,game_id,home_team,away_team,scoring_team,home_away,quarter,clock,type,description,score_home,score_away,yard_line,game_date,season,week
0,240909017,New England Patriots,Indianapolis Colts,New England Patriots,home,1,14:51,Pass Completion,Tom Brady (NWE) pass across the middle complete to David Givens (NWE) for 19 yards.,0,0,NE 37,2004-09-10T01:00Z,2004,1
1,240909017,New England Patriots,Indianapolis Colts,New England Patriots,home,1,14:21,Pass Completion,Tom Brady (NWE) pass across the middle complete to Deion Branch (NWE) for 14 yards.,0,0,IND 44,2004-09-10T01:00Z,2004,1
2,240909017,New England Patriots,Indianapolis Colts,New England Patriots,home,1,13:36,Pass Incompletion,Tom Brady (NWE) pass incomplete to the left side.,0,0,IND 30,2004-09-10T01:00Z,2004,1
3,240909017,New England Patriots,Indianapolis Colts,New England Patriots,home,1,13:23,Pass Completion,Tom Brady (NWE) pass right side complete to Ben Watson (NWE) for 2 yards.,0,0,IND 30,2004-09-10T01:00Z,2004,1
4,240909017,New England Patriots,Indianapolis Colts,New England Patriots,home,1,12:54,Pass Completion,Tom Brady (NWE) pass right side complete to Ben Watson (NWE) for 14 yards.,0,0,IND 28,2004-09-10T01:00Z,2004,1


In [35]:
# 1) make sure your date is datetime
df_scoring["game_date"] = pd.to_datetime(df_scoring["game_date"])

In [36]:
# 2) Q4 share by season: what fraction of scoring plays happen in Q4?
q4_share = (
    df_scoring["quarter"].eq(4).groupby(df_scoring["season"]).mean().rename("q4_share")
)

In [37]:
# 3) Field‑goal share by season: fraction of scoring plays that are FGs
fg_share = (
    df_scoring["type"]
    .str.contains("Field Goal", case=False)
    .groupby(df_scoring["season"])
    .mean()
    .rename("fg_share")
)

In [38]:
# 4) Build a game‑level table with final scores, pt diff, and home‑win flag
game_final = (
    df_scoring.groupby(["game_id", "season"])
    .agg(home_score=("score_home", "max"), away_score=("score_away", "max"))
    .assign(
        pt_diff=lambda d: (d.home_score - d.away_score).abs(),
        home_win=lambda d: d.home_score > d.away_score,
    )
    .reset_index()
)

In [39]:
# 5) Average point‑diff and home‑win pct by season
avg_diff = game_final.groupby("season")["pt_diff"].mean().rename("avg_pt_diff")
home_win_pct = game_final.groupby("season")["home_win"].mean().rename("home_win_pct")

In [40]:
# 6) Combine into one summary DataFrame
summary = pd.concat([q4_share, fg_share, avg_diff, home_win_pct], axis=1).reset_index()
summary

Unnamed: 0,season,q4_share,fg_share,avg_pt_diff,home_win_pct
0,2004,0.276992,0.000308,11.128906,0.558594
1,2005,0.274392,0.021932,11.666667,0.592157
2,2006,0.273746,0.021632,11.429688,0.535156
3,2007,0.278693,0.021937,12.371094,0.574219
4,2008,0.270713,0.023082,12.222656,0.570312
5,2009,0.271113,0.021113,12.957031,0.570312
6,2010,0.2766,0.022254,11.746094,0.558594
7,2011,0.276803,0.023145,12.023438,0.566406
8,2012,0.273967,0.022579,12.152344,0.570312
9,2013,0.274008,0.021958,11.292969,0.597656
