In [32]:
from nba_api.stats.endpoints import ShotChartDetail, LeagueGameLog, BoxScorePlayerTrackV3, BoxScoreAdvancedV3, playbyplayv3, SynergyPlayTypes
import pandas as pd
import time
seasons = ['2020-21','2021-22','2022-23','2023-24']


# Data Fetching
Data gathered through the NBA API with the basic pattern for most endpoints being get the games in a season and query the endpoint for each game. Play type data is not done on a game by game basis so is retrived sepratly. 

In [2]:
def get_game_ids(season, season_type='Regular Season'):
    game_log = LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
    game_ids = game_log['GAME_ID'].unique().tolist()
    return game_ids

def get_data(game_ids):
    play_by_play_data = []
    shotchart_data = []
    playertracking_data = []
    advanced_data = []
    
    for game_id in game_ids:
        try:
            #play by play
            try:
                play_by_play = playbyplayv3.PlayByPlayV3(game_id=game_id).get_data_frames()[0]
                play_by_play['GAME_ID'] = game_id
                play_by_play_data.append(play_by_play)
            except Exception as e:
                print(f"Failed PlayByPlayV2 for {game_id}: {e}")
                time.sleep(1)
                
            #ShotChartDetail
            try:
                shotchart = ShotChartDetail(game_id_nullable=game_id, team_id=0, player_id=0,context_measure_simple='FGA').get_data_frames()[0]
                shotchart['GAME_ID'] = game_id
                shotchart_data.append(shotchart)
            except Exception as e:
                print(f"Failed ShotChartDetail for {game_id}: {e}")
                time.sleep(1) 
            
            #BoxScorePlayerTrackV3
            try:
                player_tracking = BoxScorePlayerTrackV3(game_id=game_id).get_data_frames()[0]
                player_tracking['GAME_ID'] = game_id
                playertracking_data.append(player_tracking)
            except Exception as e:
                print(f"Failed BoxScorePlayerTrackV3 for {game_id}: {e}")
                time.sleep(1)
            
            #BoxScoreAdvancedV3
            try:
                advanced_boxscore = BoxScoreAdvancedV3(game_id=game_id).get_data_frames()[0]
                advanced_boxscore['GAME_ID'] = game_id
                advanced_data.append(advanced_boxscore)
            except Exception as e:
                print(f"Failed BoxScoreAdvancedV3 for {game_id}: {e}")
                time.sleep(1)
            
            # rate limiting
            time.sleep(0.6)
        
        except Exception as e:
            print(f"Unexpected error for game {game_id}: {e}")
            time.sleep(10) 
    
    # Concat dataframes only if data exists
    shotchart_df = pd.concat(shotchart_data, ignore_index=True) if shotchart_data else pd.DataFrame()
    playertracking_df = pd.concat(playertracking_data, ignore_index=True) if playertracking_data else pd.DataFrame()
    advanced_df = pd.concat(advanced_data, ignore_index=True) if advanced_data else pd.DataFrame()
    play_by_play_df = pd.concat(play_by_play_data, ignore_index=True) if play_by_play_data else pd.DataFrame()
    
    return shotchart_df, playertracking_df, advanced_df,play_by_play_df




This takes a while NBA api rate limiting is brutal.

In [3]:

for season in seasons:
    game_ids = get_game_ids(season = season)


    shotchart_df, playertracking_df,advanced_box_df,play_by_play_df = get_data(game_ids)
    
    #names
    shotchart_file = f"{season}_shotchart_data.csv"
    playertracking_file = f"{season}_playertracking_data.csv"
    advanced_box_file = f"{season}_advanced_box_data.csv"
    play_by_play_file = f'{season}_play_by_play.csv'
    
    shotchart_df.to_csv(shotchart_file, index=False)
    playertracking_df.to_csv(playertracking_file, index=False)
    advanced_box_df.to_csv(advanced_box_file,index=False)
    play_by_play_df.to_csv(play_by_play_file, index=False)
    

Failed PlayByPlayV2 for 0022100301: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out.
Failed PlayByPlayV2 for 0022100506: Expecting value: line 1 column 1 (char 0)
Failed PlayByPlayV2 for 0022200707: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


## To fix
Failed PlayByPlayV2 for 0022100301: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out.
Failed PlayByPlayV2 for 0022100506: Expecting value: line 1 column 1 (char 0)
Failed PlayByPlayV2 for 0022200707: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


### Get playType data

In [35]:
league_id = '00'  # NBA
season_type = 'Regular Season'
per_mode = 'PerGame'
player_or_team = 'P'
play_types = ['Cut', 'Handoff', 'Isolation', 'Misc', 'OffScreen', 'Postup', 'PRBallHandler', 'PRRollman', 'OffRebound', 'Spotup', 'Transition']
type_groupings = ['offensive', 'defensive']

for season in seasons:
    synergy_data = [] 
    
    for type_grouping in type_groupings:
        for play_type in play_types:
            try:
                # Fetch the synergy data
                synergy = SynergyPlayTypes(
                    league_id=league_id,
                    per_mode_simple=per_mode,
                    player_or_team_abbreviation=player_or_team,
                    season_type_all_star=season_type,
                    season=season,
                    play_type_nullable=play_type,
                    type_grouping_nullable=type_grouping
                ).get_data_frames()[0]
                
                synergy_data.append(synergy)
                time.sleep(0.6)  # Delay for rate limiting

            except Exception as e:
                print(f"Failed Synergy for season: {season}, play type: {play_type}, type grouping: {type_grouping}: {e}")
                time.sleep(1)  # Delay before retrying on error

    
    synergy_df = pd.concat(synergy_data, ignore_index=True) if synergy_data else pd.DataFrame()

    # Save
    play_type_file = f'{season}_play_type.csv'
    synergy_df.to_csv(play_type_file, index=False)




        

  synergy_df = pd.concat(synergy_data, ignore_index=True) if synergy_data else pd.DataFrame()
  synergy_df = pd.concat(synergy_data, ignore_index=True) if synergy_data else pd.DataFrame()
  synergy_df = pd.concat(synergy_data, ignore_index=True) if synergy_data else pd.DataFrame()
  synergy_df = pd.concat(synergy_data, ignore_index=True) if synergy_data else pd.DataFrame()
