In [1]:
from nba_api.stats.endpoints import ShotChartDetail, LeagueGameLog, BoxScorePlayerTrackV3, BoxScoreAdvancedV3
import pandas as pd
import time


In [2]:
def get_game_ids(season, season_type='Regular Season'):
    game_log = LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
    game_ids = game_log['GAME_ID'].unique().tolist()
    return game_ids

def get_data(game_ids):
    shotchart_data = []
    playertracking_data = []
    advanced_data = []
    
    for game_id in game_ids:
        try:
            #ShotChartDetail
            try:
                shotchart = ShotChartDetail(game_id_nullable=game_id, team_id=0, player_id=0).get_data_frames()[0]
                shotchart['GAME_ID'] = game_id
                shotchart_data.append(shotchart)
            except Exception as e:
                print(f"Failed ShotChartDetail for {game_id}: {e}")
                time.sleep(1) 
            
            #BoxScorePlayerTrackV3
            try:
                player_tracking = BoxScorePlayerTrackV3(game_id=game_id).get_data_frames()[0]
                player_tracking['GAME_ID'] = game_id
                playertracking_data.append(player_tracking)
            except Exception as e:
                print(f"Failed BoxScorePlayerTrackV3 for {game_id}: {e}")
                time.sleep(1)
            
            #BoxScoreAdvancedV3
            try:
                advanced_boxscore = BoxScoreAdvancedV3(game_id=game_id).get_data_frames()[0]
                advanced_boxscore['GAME_ID'] = game_id
                advanced_data.append(advanced_boxscore)
            except Exception as e:
                print(f"Failed BoxScoreAdvancedV3 for {game_id}: {e}")
                time.sleep(1)
            
            # rate limiting
            time.sleep(0.6)
        
        except Exception as e:
            print(f"Unexpected error for game {game_id}: {e}")
            time.sleep(10)  # Longer delay in case of unexpected errors
    
    # Concat dataframes only if data exists
    shotchart_df = pd.concat(shotchart_data, ignore_index=True) if shotchart_data else pd.DataFrame()
    playertracking_df = pd.concat(playertracking_data, ignore_index=True) if playertracking_data else pd.DataFrame()
    advanced_df = pd.concat(advanced_data, ignore_index=True) if advanced_data else pd.DataFrame()
    
    return shotchart_df, playertracking_df, advanced_df


In [4]:
seasons = ['2022-23', '2023-24']

for season in seasons:
    game_ids = get_game_ids(season = season)


    shotchart_df, playertracking_df,advanced_box_df = get_data(game_ids)
    
    #names
    shotchart_file = f"{season}_shotchart_data.csv"
    playertracking_file = f"{season}_playertracking_data.csv"
    advanced_box_file = f"{season}_advanced_box_data.csv"
    
    
    shotchart_df.to_csv(shotchart_file, index=False)
    playertracking_df.to_csv(playertracking_file, index=False)
    advanced_box_df.to_csv(advanced_box_file,index=False)
    

Failed BoxScoreAdvancedV3 for game 0022300238: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


In [None]:
#Failed BoxScoreAdvancedV3 for game 0022300238: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
