In [12]:
%pip install tenacity tqdm

Note: you may need to restart the kernel to use updated packages.


In [13]:
from nba_api.stats.endpoints import BoxScoreAdvancedV2, LeagueDashTeamStats, LeagueGameFinder, LeagueDashPlayerStats, PlayByPlayV2, LeagueLineupViz, TeamPlayerOnOffSummary
from tenacity import retry, stop_after_attempt, wait_exponential
import pandas as pd
from tqdm import tqdm

pd.set_option('display.max_columns', None)


In [14]:
season = "2023-24"

In [15]:
def fetch_team_stats(season):
    team_stats = LeagueDashTeamStats(season=season).get_data_frames()[0]
    team_stats = team_stats[['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L']]
    return team_stats

def fetch_game_stats(season, team=None):
    if team is None:
        game_stats = LeagueGameFinder(season_nullable=season).get_data_frames()[0]
    else:
        game_stats = LeagueGameFinder(season_nullable=season, team_id_nullable=team).get_data_frames()[0]
    return game_stats

def fetch_player_stats(season):
    player_stats = LeagueDashPlayerStats(season=season).get_data_frames()[0]
    player_stats = player_stats[['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'GP', 'MIN', 'PTS', 'AST', 'REB', 'STL', 'BLK', 'TOV', 'PLUS_MINUS']]
    return player_stats

def fetch_lineup_data(season, minutes_min, team=None):
    if team is None:
        lineup_data = LeagueLineupViz(season=season, measure_type_detailed_defense="Advanced", minutes_min=minutes_min).get_data_frames()[0]
    else:
        lineup_data = LeagueLineupViz(season=season, measure_type_detailed_defense="Advanced", minutes_min=minutes_min, team_id_nullable=team).get_data_frames()[0]
    lineup_data = lineup_data[['GROUP_ID', 'GROUP_NAME', 'TEAM_ID' , 'OFF_RATING', 'DEF_RATING', 'NET_RATING']]
    return lineup_data

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30))
def fetch_events(game_id):
    play_by_play = PlayByPlayV2(game_id=game_id).get_data_frames()[0]
    play_by_play = play_by_play[[
        "GAME_ID",
        "EVENTNUM",
        "EVENTMSGTYPE",
        "PERIOD",
        "PCTIMESTRING",
        "PLAYER1_ID",
        "PLAYER1_NAME",
        "PLAYER1_TEAM_ID",
        "PLAYER1_TEAM_ABBREVIATION",
        "PLAYER2_ID",
        "PLAYER2_NAME",
        "PLAYER2_TEAM_ID",
        "PLAYER2_TEAM_ABBREVIATION"
    ]]
    return play_by_play

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_team_on_off_summary(team_id, season):
    team_on_off_summary = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[0]
    team_on_off_summary = team_on_off_summary[[
        "GROUP_VALUE",  # Identifies the lineup or player combination
        "TEAM_ID",
        "TEAM_ABBREVIATION",
        "TEAM_NAME",
        "GP",  # Games played
        "W",  # Wins
        "L",  # Losses
        "W_PCT",  # Win percentage
        "MIN",  # Minutes played
        "PLUS_MINUS",  # Net impact of lineup
        "OFF_RATING",  # Offensive rating (points per 100 possessions)
        "DEF_RATING",  # Defensive rating (points allowed per 100 possessions)
        "NET_RATING",  # Net rating (OFF_RATING - DEF_RATING)
        "PACE",  # Pace (possessions per 48 minutes)
        "AST",  # Assists
        "TOV",  # Turnovers
        "STL",  # Steals
        "BLK",  # Blocks
        "REB",  # Total rebounds
        "OREB",  # Offensive rebounds
        "DREB",  # Defensive rebounds
        "FG_PCT",  # Field goal percentage
        "FG3_PCT",  # Three-point percentage
        "FT_PCT",  # Free throw percentage
        "PTS"  # Total points
    ]]
    return team_on_off_summary

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_player_on_summary(team_id, season):
    player_on_summary = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[1]
    return player_on_summary

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_player_off_summary(team_id, season):
    player_off_summary = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[2]
    return player_off_summary

In [16]:
team_stats = fetch_team_stats(season=season)
team_stats = team_stats[0:1]

player_stats = fetch_player_stats(season=season)
player_stats = player_stats[player_stats.TEAM_ID == team_stats.TEAM_ID.values[0]]

lineup_data = fetch_lineup_data(season=season, minutes_min=5, team=team_stats.TEAM_ID.values[0])
lineup_data = lineup_data[lineup_data.TEAM_ID == team_stats.TEAM_ID.values[0]]
game_stats = fetch_game_stats(season=season, team=team_stats.TEAM_ID.values[0])

team_stats.to_csv("../data/team_stats.csv", index=False)
player_stats.to_csv("../data/player_stats.csv", index=False)
lineup_data.to_csv("../data/lineup_data.csv", index=False)
game_stats.to_csv("../data/game_stats.csv", index=False)

In [17]:
all_team_on_off_summary = pd.DataFrame()
all_player_on_summary = pd.DataFrame()
all_player_off_summary = pd.DataFrame()
n = 0
for team in tqdm(team_stats.iterrows()):
  if n == 100:
    break
  n += 1
  try:
    team_on_off_summary = fetch_team_on_off_summary(team_id=team[1]['TEAM_ID'], season=season)
    all_team_on_off_summary = pd.concat([all_team_on_off_summary, team_on_off_summary])

    player_on_summary = fetch_player_on_summary(team_id=team[1]['TEAM_ID'], season=season)
    all_player_on_summary = pd.concat([all_player_on_summary, player_on_summary])

    player_off_summary = fetch_player_off_summary(team_id=team[1]['TEAM_ID'], season=season)
    all_player_off_summary = pd.concat([all_player_off_summary, player_off_summary])

  except Exception as e:
    print(f"Skipping team {team[1]['TEAM_ID']} after retries: {e}")

all_team_on_off_summary.to_csv("../data/team_on_off_summary.csv")
all_player_on_summary.to_csv("../data/player_on_summary.csv")
all_player_off_summary.to_csv("../data/player_off_summary.csv")

1it [00:10, 10.31s/it]

Skipping team 1610612737 after retries: RetryError[<Future at 0x22714a546b0 state=finished raised KeyError>]





In [8]:
def create_player_on_of_summary(all_player_off_summary, all_player_on_summary, team_stats):
    all_player_on_off_summary = pd.DataFrame()
    ratings = ['NET_RATING', 'OFF_RATING', 'DEF_RATING']
    for _, team in team_stats.iterrows():
        for rating in ratings:
            players_on_summary = all_player_on_summary.loc[all_player_on_summary['TEAM_ID'] == team['TEAM_ID']]
            player_off_summary = all_player_off_summary.loc[all_player_off_summary['TEAM_ID'] == team['TEAM_ID']]

            # Merge the dataframes
            player_on_off_summary = pd.merge(players_on_summary, player_off_summary, on='VS_PLAYER_ID', suffixes=('_on', '_off'))
            player_on_off_summary[f'{rating}_DIFF'] = player_on_off_summary[f'{rating}_on'] - player_on_off_summary[f'{rating}_off']
            all_player_on_off_summary = pd.concat([all_player_on_off_summary, player_on_off_summary])
    return all_player_on_off_summary


In [9]:
all_player_on_off_summary = create_player_on_of_summary(all_player_off_summary, all_player_on_summary, team_stats)
all_player_on_off_summary.to_csv("../data/player_on_off_summary.csv")

KeyError: 'TEAM_ID'

In [61]:
all_events = pd.DataFrame()
n = 0
for game in tqdm(game_stats.iterrows()):
  if n == 100:
    break
  n += 1
  try:
    events = fetch_events(game_id=game[1]['GAME_ID'])
    all_events = pd.concat([all_events, events])
  except Exception as e:
    print(f"Skipping game {game[1]['GAME_ID']} after retries: {e}")

all_events.to_csv("../data/events.csv")

35it [02:14, 28.42s/it]

Skipping game 0022300699 after retries: RetryError[<Future at 0x257bec91e20 state=finished raised ReadTimeout>]


36it [03:48, 48.14s/it]

Skipping game 0022300690 after retries: RetryError[<Future at 0x257bec93710 state=finished raised ReadTimeout>]


37it [05:22, 61.93s/it]

Skipping game 0022300669 after retries: RetryError[<Future at 0x257bec91af0 state=finished raised ReadTimeout>]


38it [06:56, 71.59s/it]

Skipping game 0022300653 after retries: RetryError[<Future at 0x257bec93170 state=finished raised ReadTimeout>]


39it [08:30, 78.35s/it]

Skipping game 0022300634 after retries: RetryError[<Future at 0x257bec92510 state=finished raised ReadTimeout>]


40it [10:04, 83.08s/it]

Skipping game 0022300626 after retries: RetryError[<Future at 0x257bec92240 state=finished raised ReadTimeout>]


71it [12:49, 28.41s/it]

Skipping game 0022300039 after retries: RetryError[<Future at 0x257bcbc22a0 state=finished raised ReadTimeout>]


71it [14:23, 12.16s/it]


KeyboardInterrupt: 