In [259]:
# I am going to use an API from ESPN to pull regular season scores and results for the NFL.
# I inspect and clean the data to pull a couple insights from the data

In [238]:
import requests
import json
from pprint import pprint
import pandas as pd

In [239]:
response = requests.get("https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard")
print(response.status_code)

200


In [240]:
print(response.json())

{'leagues': [{'id': '28', 'uid': 's:20~l:28', 'name': 'National Football League', 'abbreviation': 'NFL', 'slug': 'nfl', 'season': {'year': 2025, 'startDate': '2025-07-31T07:00Z', 'endDate': '2026-02-12T07:59Z', 'displayName': '2025', 'type': {'id': '3', 'type': 3, 'name': 'Postseason', 'abbreviation': 'post'}}, 'logos': [{'href': 'https://a.espncdn.com/i/teamlogos/leagues/500/nfl.png', 'width': 500, 'height': 500, 'alt': '', 'rel': ['full', 'default'], 'lastUpdated': '2018-06-05T12:07Z'}, {'href': 'https://a.espncdn.com/i/teamlogos/leagues/500-dark/nfl.png', 'width': 500, 'height': 500, 'alt': '', 'rel': ['full', 'dark'], 'lastUpdated': '2024-07-22T16:53Z'}], 'calendarType': 'list', 'calendarIsWhitelist': True, 'calendarStartDate': '2025-07-31T07:00Z', 'calendarEndDate': '2026-02-12T07:59Z', 'calendar': [{'label': 'Preseason', 'value': '1', 'startDate': '2025-07-31T07:00Z', 'endDate': '2025-09-04T06:59Z', 'entries': [{'label': 'Hall of Fame Weekend', 'alternateLabel': 'HOF', 'detail': 

In [241]:
pprint(response.json())

{'events': [{'competitions': [{'attendance': 0,
                               'broadcast': 'ESPN/DISNEY XD',
                               'broadcasts': [{'market': 'national',
                                               'names': ['ESPN', 'DISNEY XD']}],
                               'competitors': [{'homeAway': 'home',
                                                'id': '31',
                                                'order': 0,
                                                'score': '0',
                                                'statistics': [],
                                                'team': {'abbreviation': 'AFC',
                                                         'displayName': 'AFC',
                                                         'id': '31',
                                                         'isActive': False,
                                                         'links': [],
                                                  

In [242]:
import datetime

current_year = datetime.datetime.now().year
start_year = 2021

In [243]:
# I am pulling all regular season games from 2021-2026 (current season) bc that is the earliest season with 17 regular games. 
# I only want to see specific categories of data to simplify the df.

In [244]:
all_games = []

for year in range(start_year, current_year + 1):
    for week in range(1, 19):
        url = (
            "https://site.api.espn.com/apis/site/v2/"
            "sports/football/nfl/scoreboard"
            f"?seasontype=2&week={week}&year={year}"
        )

        response = requests.get(url)
        if response.status_code != 200:
            continue

        data = response.json()

        for event in data.get("events", []):
            if event["season"]["type"] != 2:
                continue

            competition = event["competitions"][0]
            competitors = competition["competitors"]

            home = next(t for t in competitors if t["homeAway"] == "home")
            away = next(t for t in competitors if t["homeAway"] == "away")

            all_games.append({
                "season": year,
                "week": week,
                "date": event["date"],
                "home_team": home["team"]["displayName"],
                "away_team": away["team"]["displayName"],
                "home_score": int(home.get("score", 0)),
                "away_score": int(away.get("score", 0)),
                "status": competition["status"]["type"]["name"]
            })

In [245]:
NFL_Regular_Season_Games_21to25_df = pd.DataFrame(all_games)
NFL_Regular_Season_Games_21to25_df.head()

Unnamed: 0,season,week,date,home_team,away_team,home_score,away_score,status
0,2021,1,2025-09-05T00:20Z,Philadelphia Eagles,Dallas Cowboys,24,20,STATUS_FINAL
1,2021,1,2025-09-06T00:00Z,Los Angeles Chargers,Kansas City Chiefs,27,21,STATUS_FINAL
2,2021,1,2025-09-07T17:00Z,Atlanta Falcons,Tampa Bay Buccaneers,20,23,STATUS_FINAL
3,2021,1,2025-09-07T17:00Z,Cleveland Browns,Cincinnati Bengals,16,17,STATUS_FINAL
4,2021,1,2025-09-07T17:00Z,Indianapolis Colts,Miami Dolphins,33,8,STATUS_FINAL


In [246]:
NFL_Regular_Season_Games_21to25_df = NFL_Regular_Season_Games_21to25_df[NFL_Regular_Season_Games_21to25_df["status"] == "STATUS_FINAL"]

In [247]:
# I only want final game scores, so I took out any games that were not final

In [248]:
NFL_Regular_Season_Games_21to25_df["status"].value_counts()

status
STATUS_FINAL    1632
Name: count, dtype: int64

In [249]:
# To double check my data, I checked the total number of games, correct if 272 (17 games x 32 teams / 2 teams) = 272 games per season

In [250]:
NFL_Regular_Season_Games_21to25_df["season"].value_counts().sort_index()

season
2021    272
2022    272
2023    272
2024    272
2025    272
2026    272
Name: count, dtype: int64

In [251]:
NFL_Regular_Season_Games_21to25_df.sample(5)

Unnamed: 0,season,week,date,home_team,away_team,home_score,away_score,status
1175,2025,6,2025-10-12T20:05Z,Las Vegas Raiders,Tennessee Titans,20,10,STATUS_FINAL
566,2023,2,2025-09-14T17:00Z,New Orleans Saints,San Francisco 49ers,21,26,STATUS_FINAL
1422,2026,4,2025-09-29T23:15Z,Miami Dolphins,New York Jets,27,21,STATUS_FINAL
852,2024,3,2025-09-21T17:00Z,New England Patriots,Pittsburgh Steelers,14,21,STATUS_FINAL
284,2022,1,2025-09-07T20:25Z,Green Bay Packers,Detroit Lions,27,13,STATUS_FINAL


In [252]:
# Going to eliminate "status" column because I know every game is final

In [253]:
NFL_Regular_Season_Games_21to25_df = NFL_Regular_Season_Games_21to25_df.drop(columns=["status"])

In [254]:
NFL_Regular_Season_Games_21to25_df.sample(5)

Unnamed: 0,season,week,date,home_team,away_team,home_score,away_score
884,2024,5,2025-10-05T17:00Z,New York Jets,Dallas Cowboys,22,37
142,2021,10,2025-11-09T18:00Z,Carolina Panthers,New Orleans Saints,7,17
736,2023,13,2025-12-01T01:20Z,Washington Commanders,Denver Broncos,26,27
343,2022,5,2025-10-05T17:00Z,Baltimore Ravens,Houston Texans,10,44
1347,2025,18,2026-01-04T18:00Z,Cincinnati Bengals,Cleveland Browns,18,20


In [255]:
# Now I am going to change "date" to the format dd-mmm-yy so that it is easier to read

In [256]:
NFL_Regular_Season_Games_21to25_df["date"] = pd.to_datetime(
    NFL_Regular_Season_Games_21to25_df["date"],
    utc=True
)

In [257]:
NFL_Regular_Season_Games_21to25_df["date"] = (
    NFL_Regular_Season_Games_21to25_df["date"]
        .dt.strftime("%d-%b-%y")
        .str.upper()
)

In [262]:
NFL_Regular_Season_Games_21to25_df.sample(5)

Unnamed: 0,season,week,date,home_team,away_team,home_score,away_score
169,2021,12,23-NOV-25,Tennessee Titans,Seattle Seahawks,24,30
855,2024,3,21-SEP-25,Washington Commanders,Las Vegas Raiders,41,24
149,2021,11,14-NOV-25,New England Patriots,New York Jets,27,14
87,2021,6,12-OCT-25,Las Vegas Raiders,Tennessee Titans,20,10
1060,2024,17,28-DEC-25,Green Bay Packers,Baltimore Ravens,24,41


In [263]:
# The data is now clean and in the format that I like, so I am going to convert it to a CSV

In [264]:
NFL_Regular_Season_Games_21to25_df.to_csv(
    "nfl_regular_season_games_2021_2025.csv",
    index=False
)

In [267]:
Clean_NFL_Score_Data_21to25_df = pd.read_csv("nfl_regular_season_games_2021_2025.csv")

In [268]:
Clean_NFL_Score_Data_21to25_df.head()

Unnamed: 0,season,week,date,home_team,away_team,home_score,away_score
0,2021,1,05-SEP-25,Philadelphia Eagles,Dallas Cowboys,24,20
1,2021,1,06-SEP-25,Los Angeles Chargers,Kansas City Chiefs,27,21
2,2021,1,07-SEP-25,Atlanta Falcons,Tampa Bay Buccaneers,20,23
3,2021,1,07-SEP-25,Cleveland Browns,Cincinnati Bengals,16,17
4,2021,1,07-SEP-25,Indianapolis Colts,Miami Dolphins,33,8


In [282]:
Clean_NFL_Score_Data_21to25_df["total_points"] = (
    Clean_NFL_Score_Data_21to25_df["home_score"]
    + Clean_NFL_Score_Data_21to25_df["away_score"]
)

In [283]:
Clean_NFL_Score_Data_21to25_df.head()

Unnamed: 0,season,week,date,home_team,away_team,home_score,away_score,total_points
0,2021,1,05-SEP-25,Philadelphia Eagles,Dallas Cowboys,24,20,44
1,2021,1,06-SEP-25,Los Angeles Chargers,Kansas City Chiefs,27,21,48
2,2021,1,07-SEP-25,Atlanta Falcons,Tampa Bay Buccaneers,20,23,43
3,2021,1,07-SEP-25,Cleveland Browns,Cincinnati Bengals,16,17,33
4,2021,1,07-SEP-25,Indianapolis Colts,Miami Dolphins,33,8,41


In [285]:
# I used Python to download NFL game data from ESPNâ€™s API
# I selected regular season games from 2021 to 2025 and kept only the final game results
# I cleaned the data by removing unnecessary columns and I calculated the total number of points scored in each game
# After checking that the data looked correct, I saved the cleaned dataset as a CSV file so it can be used for analysis later