In [None]:
%pip install tenacity tqdm

Note: you may need to restart the kernel to use updated packages.


In [1]:
from nba_api.stats.endpoints import BoxScoreAdvancedV2, LeagueDashTeamStats, LeagueGameFinder, LeagueDashPlayerStats, PlayByPlayV2, LeagueLineupViz, TeamPlayerOnOffSummary
from tenacity import retry, stop_after_attempt, wait_exponential
import pandas as pd
from tqdm import tqdm

pd.set_option('display.max_columns', None)


In [2]:
season = "2023-24"

In [10]:
def fetch_team_stats(season):
    team_stats = LeagueDashTeamStats(season=season).get_data_frames()[0]
    team_stats = team_stats[['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L']]
    return team_stats

def fetch_game_stats(season, team=None):
    if team is None:
        game_stats = LeagueGameFinder(season_nullable=season).get_data_frames()[0]
    else:
        game_stats = LeagueGameFinder(season_nullable=season, team_id_nullable=team).get_data_frames()[0]
    return game_stats

def fetch_player_stats(season):
    player_stats = LeagueDashPlayerStats(season=season).get_data_frames()[0]
    player_stats = player_stats[['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'GP', 'MIN', 'PTS', 'AST', 'REB', 'STL', 'BLK', 'TOV', 'PLUS_MINUS']]
    return player_stats

def fetch_lineup_data(season, minutes_min, team=None):
    if team is None:
        lineup_data = LeagueLineupViz(season=season, measure_type_detailed_defense="Advanced", minutes_min=minutes_min).get_data_frames()[0]
    else:
        lineup_data = LeagueLineupViz(season=season, measure_type_detailed_defense="Advanced", minutes_min=minutes_min, team_id_nullable=team).get_data_frames()[0]
    lineup_data = lineup_data[['GROUP_ID', 'GROUP_NAME', 'TEAM_ID' , 'OFF_RATING', 'DEF_RATING', 'NET_RATING']]
    return lineup_data

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30))
def fetch_events(game_id):
    play_by_play = PlayByPlayV2(game_id=game_id).get_data_frames()[0]
    play_by_play = play_by_play[[
        "GAME_ID",
        "EVENTNUM",
        "EVENTMSGTYPE",
        "PERIOD",
        "PCTIMESTRING",
        "PLAYER1_ID",
        "PLAYER1_NAME",
        "PLAYER1_TEAM_ID",
        "PLAYER1_TEAM_ABBREVIATION",
        "PLAYER2_ID",
        "PLAYER2_NAME",
        "PLAYER2_TEAM_ID",
        "PLAYER2_TEAM_ABBREVIATION"
    ]]
    return play_by_play

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_team_on_off_summary(team_id, season):
    team_on_off_summary = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[0]
    return team_on_off_summary

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_player_on_summary(team_id, season):
    player_on_summary = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[1]
    return player_on_summary

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_player_off_summary(team_id, season):
    player_off_summary = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[2]
    return player_off_summary

In [11]:
team_stats = fetch_team_stats(season=season)
team_stats = team_stats[0:1]

player_stats = fetch_player_stats(season=season)
player_stats = player_stats[player_stats.TEAM_ID == team_stats.TEAM_ID.values[0]]

lineup_data = fetch_lineup_data(season=season, minutes_min=5, team=team_stats.TEAM_ID.values[0])
lineup_data = lineup_data[lineup_data.TEAM_ID == team_stats.TEAM_ID.values[0]]
game_stats = fetch_game_stats(season=season, team=team_stats.TEAM_ID.values[0])

team_stats.to_csv("../data/team_stats.csv", index=False)
player_stats.to_csv("../data/player_stats.csv", index=False)
lineup_data.to_csv("../data/lineup_data.csv", index=False)
game_stats.to_csv("../data/game_stats.csv", index=False)

In [12]:
team_on_of = fetch_team_on_off_summary(team_id=team_stats.TEAM_ID.values[0], season=season)
team_on_of

Unnamed: 0,GROUP_SET,GROUP_VALUE,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
0,Overall,2023-24,1610612737,ATL,Atlanta Hawks,82,36,46,0.439,3971.0,3529,7584,0.465,1125,3092,0.364,1520,1906,0.797,1024,2639,3663,2180,1110.0,615,369,461,1522,1594,9703,-179.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [13]:
all_team_on_off_summary = pd.DataFrame()
all_player_on_summary = pd.DataFrame()
all_player_off_summary = pd.DataFrame()
n = 0
for _, team in tqdm(team_stats.iterrows()):
  if n == 100:
    break
  n += 1
  try:
    team_on_off_summary = fetch_team_on_off_summary(team_id=team['TEAM_ID'], season=season)
    all_team_on_off_summary = pd.concat([all_team_on_off_summary, team_on_off_summary])

    player_on_summary = fetch_player_on_summary(team_id=team['TEAM_ID'], season=season)
    all_player_on_summary = pd.concat([all_player_on_summary, player_on_summary])

    player_off_summary = fetch_player_off_summary(team_id=team['TEAM_ID'], season=season)
    all_player_off_summary = pd.concat([all_player_off_summary, player_off_summary])

  except Exception as e:
    print(f"Skipping team {team['TEAM_ID']} after retries: {e}")

all_team_on_off_summary.to_csv("../data/team_on_off_summary.csv")
all_player_on_summary.to_csv("../data/player_on_summary.csv")
all_player_off_summary.to_csv("../data/player_off_summary.csv")

0it [00:00, ?it/s]

1it [00:00,  6.52it/s]


In [18]:
def create_player_on_of_summary(all_player_off_summary, all_player_on_summary, team_stats):
    all_player_on_off_summary = pd.DataFrame()
    ratings = ['NET_RATING', 'OFF_RATING', 'DEF_RATING']
    for _, team in team_stats.iterrows():
        for rating in ratings:
            players_on_summary = all_player_on_summary.loc[all_player_on_summary['TEAM_ID'] == team['TEAM_ID']]
            player_off_summary = all_player_off_summary.loc[all_player_off_summary['TEAM_ID'] == team['TEAM_ID']]

            # Merge the dataframes
            player_on_off_summary = pd.merge(players_on_summary, player_off_summary, on='VS_PLAYER_ID', suffixes=('_on', '_off'))
            player_on_off_summary[f'{rating}_DIFF'] = player_on_off_summary[f'{rating}_on'] - player_on_off_summary[f'{rating}_off']
            all_player_on_off_summary = pd.concat([all_player_on_off_summary, player_on_off_summary])
            
    all_player_on_off_summary.drop(columns=[
        'GROUP_SET_on',
        'TEAM_ID_on',
        'TEAM_ABBREVIATION_on',
        'TEAM_NAME_on',
        'VS_PLAYER_NAME_on',
        ], inplace=True)
    all_player_on_off_summary.rename(columns={
        'GROUP_SET_off': 'GROUP_SET',
        'TEAM_ID_off': 'TEAM_ID',
        'TEAM_ABBREVIATION_off': 'TEAM_ABBREVIATION',
        'TEAM_NAME_off': 'TEAM_NAME',
        'VS_PLAYER_NAME_off': 'VS_PLAYER_NAME',
    }, inplace=True)
    return all_player_on_off_summary


In [19]:
all_player_on_off_summary = create_player_on_of_summary(all_player_off_summary, all_player_on_summary, team_stats)
all_player_on_off_summary.to_csv("../data/player_on_off_summary.csv")

In [20]:
all_player_on_off_summary.columns

Index(['VS_PLAYER_ID', 'COURT_STATUS_on', 'GP_on', 'MIN_on', 'PLUS_MINUS_on',
       'OFF_RATING_on', 'DEF_RATING_on', 'NET_RATING_on', 'GROUP_SET',
       'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'VS_PLAYER_NAME',
       'COURT_STATUS_off', 'GP_off', 'MIN_off', 'PLUS_MINUS_off',
       'OFF_RATING_off', 'DEF_RATING_off', 'NET_RATING_off', 'NET_RATING_DIFF',
       'OFF_RATING_DIFF', 'DEF_RATING_DIFF'],
      dtype='object')

In [21]:
all_events = pd.DataFrame()
n = 0
for game in tqdm(game_stats.iterrows()):
  if n == 100:
    break
  n += 1
  try:
    events = fetch_events(game_id=game[1]['GAME_ID'])
    all_events = pd.concat([all_events, events])
  except Exception as e:
    print(f"Skipping game {game[1]['GAME_ID']} after retries: {e}")

all_events.to_csv("../data/events.csv")

88it [00:49,  1.79it/s]
