# Coby White Analysis - FINAL (DATA COLLECTION)

# DATA SET-UP

In [1]:
pip install nba_api pandas

Note: you may need to restart the kernel to use updated packages.


In [53]:
# Import packages
import pandas as pd
import os
import matplotlib as plt
import plotly.express as px
import numpy as np
import time
import random
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from scipy.stats import percentileofscore

# COLLECT DATA

## I. General Stats

### ALL PLAYERS:

#### *LeagueDashPlayerStats*

In [None]:
from nba_api.stats.endpoints import leaguedashplayerstats

##### 2024-25 Per Game Averages

In [None]:
# Get all player stats for 2024-25 season
league_24_25_stats = leaguedashplayerstats.LeagueDashPlayerStats(season='2024-25', per_mode_detailed='PerGame').get_data_frames()[0]
league_24_25_stats

In [None]:
league_24_25_stats.columns

In [None]:
# Condense table to relevant columns
league_24_25_general_stats_per_game = league_24_25_stats[['PLAYER_NAME','MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS']].sort_values(by='PTS', ascending=False)
league_24_25_general_stats_per_game

In [None]:
# Export to CSV
league_24_25_general_stats_per_game.to_csv("league_24_25_general_stats_per_game.csv", index=False)

##### 2023-24 Per Game Averages

In [None]:
# Get all player stats for 2023-24 season
league_23_24_stats = leaguedashplayerstats.LeagueDashPlayerStats(season='2023-24', per_mode_detailed='PerGame').get_data_frames()[0]
league_23_24_stats

In [None]:
# Condense table to relevant columns
league_23_24_general_stats_per_game = league_23_24_stats[['PLAYER_NAME','MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS']].sort_values(by='PTS', ascending=False)
league_23_24_general_stats_per_game

In [None]:
# Export to CSV
league_23_24_general_stats_per_game.to_csv("league_23_24_general_stats_per_game.csv", index=False)

##### Coby White General Stats

In [None]:
Coby_White_23_24_stats = league_23_24_stats[league_23_24_stats['PLAYER_NAME'] == 'Coby White']
Coby_White_23_24_stats.columns

In [None]:
Coby_White_23_24_general_stats_per_game = league_23_24_general_stats_per_game[league_23_24_general_stats_per_game['PLAYER_NAME'] == 'Coby White']
Coby_White_23_24_general_stats_per_game

#### *LeagueDashPlayerStats - Advanced Stats*

##### 2024-25 Per Game Averages

In [None]:
# Get all player stats for 2024-25 season
league_24_25_advanced_stats = leaguedashplayerstats.LeagueDashPlayerStats(
    season='2024-25', per_mode_detailed='PerGame', measure_type_detailed_defense='Advanced'
).get_data_frames()[0]

league_24_25_advanced_stats

In [None]:
league_24_25_advanced_stats.columns

In [None]:
# Condense table to relevant columns
league_24_25_advanced_stats_per_game = league_24_25_advanced_stats[['PLAYER_NAME','EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT', 'PACE', 'E_PACE','PIE'
       ]]
league_24_25_advanced_stats_per_game

In [None]:
# Export to CSV
league_24_25_advanced_stats_per_game.to_csv("league_24_25_advanced_stats_per_game.csv", index=False)

##### Coby White Advanced Stats

In [None]:
Coby_White_23_24_advanced_stats_per_game = league_24_25_advanced_stats_per_game[league_24_25_advanced_stats_per_game['PLAYER_NAME'] == 'Coby White']
Coby_White_23_24_advanced_stats_per_game

## II. Game Logs

### ALL PLAYER LOGS:

In [5]:
from nba_api.stats.endpoints import PlayerGameLog
from nba_api.stats.static import players

In [9]:
# Get all active player IDs
all_players = players.get_active_players()
player_ids = [p['id'] for p in all_players]
player_dict = {p['id']: p['full_name'] for p in all_players}

##### 2024-25 Totals

In [9]:
# *Took 1 hour+ to run*
# Objective: Loop through game logs to get data for all players in 24-25 season
# Method: Loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 5 retries
def fetch_player_game_logs(player_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call game logs for each player with increased timeout (from default 30 to 60)
            response = PlayerGameLog(
                player_id=player_id,
                season="2024-25",
                timeout=60
            )

            data_frames = response.get_data_frames()
            
            # Ensure game log data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No game log data for player ID {player_id}")
                return None

            game_log_df = data_frames[0].copy()
            game_log_df["PLAYER_ID"] = player_id
            game_log_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
            return game_log_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for player ID {player_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for player ID {player_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying player ID {player_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store game log data for all players in a list
all_players_game_log_24_25 = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_player_game_logs, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to player dictionary
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_game_log_24_25.append(result)

# Combine results into a single DF
if all_players_game_log_24_25:
    all_players_game_log_24_25 = pd.concat(all_players_game_log_24_25, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744686746.2573068...
Retrieving data for player ID 203500 (Attempt 1) at 1744686746.478438...
Retrieving data for player ID 1628389 (Attempt 1) at 1744686746.5586207...
Retrieving data for player ID 1630534 (Attempt 1) at 1744686746.638138...
Retrieving data for player ID 1630583 (Attempt 1) at 1744686746.726418...
Retrieving data for player ID 1641725 (Attempt 1) at 1744686746.8135595...
Retrieving data for player ID 1629638 (Attempt 1) at 1744686746.8896322...
Retrieving data for player ID 1628960 (Attempt 1) at 1744686746.976582...
Retrieving data for player ID 1628386 (Attempt 1) at 1744686747.0473127...
Retrieving data for player ID 1630631 (Attempt 1) at 1744686747.1371229...
Retrieving data for player ID 203937 (Attempt 1) at 1744686747.2031941...
Retrieving data for player ID 203507 (Attempt 1) at 1744686747.2985094...
Retrieving data for player ID 1630175 (Attempt 1) at 1744686747.371562...
Retrieving data for player ID 162

In [None]:
all_players_game_log_24_25.columns

In [None]:
all_players_game_log_24_25.shape[0]

In [None]:
all_players_game_log_24_25.head()

In [11]:
# Export to CSV
all_players_game_log_24_25.to_csv("all_players_game_log_24_25.csv", index=False)

##### 2023-24 Totals

In [11]:
# *Took 1 hour+ to run*
# Objective: Loop through game logs to get data for all players in 23-24 season
# Method: Loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 5 retries
def fetch_player_game_logs(player_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call game logs for each player with increased timeout (from default 30 to 60)
            response = PlayerGameLog(
                player_id=player_id,
                season="2023-24",
                timeout=60
            )

            data_frames = response.get_data_frames()
            
            # Ensure game log data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No game log data for player ID {player_id}")
                return None

            game_log_df = data_frames[0].copy()
            game_log_df["PLAYER_ID"] = player_id
            game_log_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
            return game_log_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for player ID {player_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for player ID {player_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying player ID {player_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store game log data for all players in a list
all_players_game_log_23_24 = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_player_game_logs, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to player dictionary
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_game_log_23_24.append(result)

# Combine results into a single DF
if all_players_game_log_23_24:
    all_players_game_log_23_24 = pd.concat(all_players_game_log_23_24, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744726023.6007245...
Retrieving data for player ID 203500 (Attempt 1) at 1744726023.718408...
‚ùå No game log data for player ID 203500
Retrieving data for player ID 1628389 (Attempt 1) at 1744726023.7802987...
Retrieving data for player ID 1630534 (Attempt 1) at 1744726023.8416462...
Retrieving data for player ID 1630583 (Attempt 1) at 1744726023.902791...
Retrieving data for player ID 1641725 (Attempt 1) at 1744726024.004433...
‚ùå No game log data for player ID 1641725
Retrieving data for player ID 1629638 (Attempt 1) at 1744726024.0684462...
Retrieving data for player ID 1628960 (Attempt 1) at 1744726024.1397297...
Retrieving data for player ID 1628386 (Attempt 1) at 1744726024.2010083...
Retrieving data for player ID 1630631 (Attempt 1) at 1744726024.2567947...
Retrieving data for player ID 203937 (Attempt 1) at 1744726024.3039143...
Retrieving data for player ID 203507 (Attempt 1) at 1744726024.4349782...
Retrieving data for p

In [13]:
all_players_game_log_23_24.shape[0]

22295

In [15]:
all_players_game_log_23_24.head()

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_ID,FULL_NAME
0,22023,1630173,22301190,"APR 14, 2024",NYK vs. CHI,W,19,2,3,0.667,...,2,0,1,2,3,4,-2,1,1630173,Precious Achiuwa
1,22023,1630173,22301175,"APR 12, 2024",NYK vs. BKN,W,8,2,2,1.0,...,0,0,0,0,0,5,3,1,1630173,Precious Achiuwa
2,22023,1630173,22301167,"APR 11, 2024",NYK @ BOS,W,16,1,6,0.167,...,0,0,1,1,0,2,-9,1,1630173,Precious Achiuwa
3,22023,1630173,22301139,"APR 07, 2024",NYK @ MIL,W,5,0,1,0.0,...,0,0,0,0,1,0,5,1,1630173,Precious Achiuwa
4,22023,1630173,22301119,"APR 05, 2024",NYK @ CHI,L,19,0,2,0.0,...,1,1,0,1,4,0,-2,1,1630173,Precious Achiuwa


In [17]:
# Export to CSV
all_players_game_log_23_24.to_csv("all_players_game_log_23_24.csv", index=False)

## III. Role Stats

### USAGE:

#### LeagueGameLog: *Get a list of all game IDs from every season*

In [7]:
from nba_api.stats.endpoints import LeagueGameLog

##### 2024-25 Game IDs

In [9]:
league_game_log_24_25 = LeagueGameLog(season='2024-25', season_type_all_star='Regular Season')
league_game_log_24_25_df = league_game_log_24_25.get_data_frames()[0]
league_game_log_24_25_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22024,1610612752,NYK,New York Knicks,0022400061,2024-10-22,NYK @ BOS,L,240,43,...,29,34,20,2,3,12,12,109,-23,1
1,22024,1610612738,BOS,Boston Celtics,0022400061,2024-10-22,BOS vs. NYK,W,240,48,...,29,40,33,6,3,4,15,132,23,1
2,22024,1610612750,MIN,Minnesota Timberwolves,0022400062,2024-10-22,MIN @ LAL,L,240,35,...,35,47,17,4,1,16,22,103,-7,1
3,22024,1610612747,LAL,Los Angeles Lakers,0022400062,2024-10-22,LAL vs. MIN,W,240,42,...,31,46,22,7,8,7,22,110,7,1
4,22024,1610612746,LAC,LA Clippers,0022400071,2024-10-23,LAC vs. PHX,L,265,42,...,38,51,27,9,1,22,29,113,-3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,22024,1610612762,UTA,Utah Jazz,0022401195,2025-04-13,UTA @ MIN,L,240,40,...,31,40,24,6,5,14,15,105,-11,1
2456,22024,1610612739,CLE,Cleveland Cavaliers,0022401189,2025-04-13,CLE vs. IND,L,290,44,...,47,65,21,6,9,22,23,118,-8,1
2457,22024,1610612749,MIL,Milwaukee Bucks,0022401192,2025-04-13,MIL vs. DET,W,265,50,...,32,38,30,5,7,21,17,140,7,1
2458,22024,1610612765,DET,Detroit Pistons,0022401192,2025-04-13,DET @ MIL,L,265,51,...,28,42,35,9,3,13,19,133,-7,1


In [11]:
league_game_log_24_25_df['GAME_DATE'].dtype

dtype('O')

In [13]:
# Convert game_date into datetime object
league_game_log_24_25_df['GAME_DATE'] = pd.to_datetime(league_game_log_24_25_df['GAME_DATE'])

In [15]:
game_ids_24_25 = league_game_log_24_25_df[['GAME_ID', 'GAME_DATE']]
game_ids_24_25

Unnamed: 0,GAME_ID,GAME_DATE
0,0022400061,2024-10-22
1,0022400061,2024-10-22
2,0022400062,2024-10-22
3,0022400062,2024-10-22
4,0022400071,2024-10-23
...,...,...
2455,0022401195,2025-04-13
2456,0022401189,2025-04-13
2457,0022401192,2025-04-13
2458,0022401192,2025-04-13


In [17]:
all_star_date = pd.to_datetime("2025-02-14")

In [19]:
game_ids_24_25_1H = game_ids_24_25[game_ids_24_25['GAME_DATE'] < all_star_date]
game_ids_24_25_1H

Unnamed: 0,GAME_ID,GAME_DATE
0,0022400061,2024-10-22
1,0022400061,2024-10-22
2,0022400062,2024-10-22
3,0022400062,2024-10-22
4,0022400071,2024-10-23
...,...,...
1623,0022400785,2025-02-13
1624,0022400786,2025-02-13
1625,0022400785,2025-02-13
1626,0022400784,2025-02-13


In [21]:
game_ids_24_25_2H = game_ids_24_25[game_ids_24_25['GAME_DATE'] > all_star_date]
game_ids_24_25_2H

Unnamed: 0,GAME_ID,GAME_DATE
1628,0022400524,2025-02-19
1629,0022400524,2025-02-19
1630,0022400795,2025-02-20
1631,0022400796,2025-02-20
1632,0022400796,2025-02-20
...,...,...
2455,0022401195,2025-04-13
2456,0022401189,2025-04-13
2457,0022401192,2025-04-13
2458,0022401192,2025-04-13


##### 2023-24 Game IDs

In [25]:
league_game_log_23_24 = LeagueGameLog(season='2023-24', season_type_all_star='Regular Season')
league_game_log_23_24_df = league_game_log_23_24.get_data_frames()[0]
league_game_log_23_24_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22023,1610612744,GSW,Golden State Warriors,0022300062,2023-10-24,GSW vs. PHX,L,240,36,...,31,49,19,11,6,11,23,104,-4,1
1,22023,1610612756,PHX,Phoenix Suns,0022300062,2023-10-24,PHX @ GSW,W,240,42,...,43,60,23,5,7,19,22,108,4,1
2,22023,1610612747,LAL,Los Angeles Lakers,0022300061,2023-10-24,LAL @ DEN,L,240,41,...,31,44,23,5,4,12,18,107,-12,1
3,22023,1610612743,DEN,Denver Nuggets,0022300061,2023-10-24,DEN vs. LAL,W,240,48,...,33,42,29,9,6,12,15,119,12,1
4,22023,1610612748,MIA,Miami Heat,0022300068,2023-10-25,MIA vs. DET,W,240,37,...,32,48,22,11,3,7,18,103,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,22023,1610612757,POR,Portland Trail Blazers,0022301200,2024-04-14,POR @ SAC,L,240,31,...,31,54,18,11,2,18,19,82,-39,1
2456,22023,1610612754,IND,Indiana Pacers,0022301188,2024-04-14,IND vs. ATL,W,240,65,...,36,48,41,10,6,16,24,157,42,1
2457,22023,1610612738,BOS,Boston Celtics,0022301186,2024-04-14,BOS vs. WAS,W,240,51,...,38,48,29,10,15,17,13,132,10,1
2458,22023,1610612750,MIN,Minnesota Timberwolves,0022301194,2024-04-14,MIN vs. PHX,L,240,36,...,24,36,22,7,5,24,20,106,-19,1


In [27]:
# Convert game_date into datetime object
league_game_log_23_24_df['GAME_DATE'] = pd.to_datetime(league_game_log_23_24_df['GAME_DATE'])

In [29]:
game_ids_23_24 = league_game_log_23_24_df[['GAME_ID', 'GAME_DATE']]
game_ids_23_24

Unnamed: 0,GAME_ID,GAME_DATE
0,0022300062,2023-10-24
1,0022300062,2023-10-24
2,0022300061,2023-10-24
3,0022300061,2023-10-24
4,0022300068,2023-10-25
...,...,...
2455,0022301200,2024-04-14
2456,0022301188,2024-04-14
2457,0022301186,2024-04-14
2458,0022301194,2024-04-14


#### BoxScoreUsageV2

In [114]:
from nba_api.stats.endpoints import BoxScoreUsageV2

##### 2024-25 Pre All-Star

In [124]:
# Takes 4+ hours to run
# Function to fetch and process data for each box score with up to 5 retries
def fetch_box_scores(game_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving box scores for game ID {game_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call box scores for each game with increased timeout (from default 30 to 60)
            response = BoxScoreUsageV2(game_id=game_id, timeout=60)
            data_frames = response.get_data_frames()

            # Ensure box score data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No box score data for game ID {game_id}")
                return None

            box_score_df = data_frames[0].copy()
            box_score_df["GAME_ID"] = game_id
            return box_score_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for game ID {game_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for game ID {game_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying game ID {game_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for game ID {game_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store box score data for all games in a list
box_score_usage_24_25_1H = []

# Run multiple requests in parallel and submit future objects into game dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_game = {
        executor.submit(fetch_box_scores, gid): gid
        for gid in game_ids_24_25_1H["GAME_ID"]
    }

    # Process results as soon as they're completed and add results from future_to_game function to box score list
    for future in as_completed(future_to_game):
        result = future.result()
        if result is not None:
            box_score_usage_24_25_1H.append(result)

# Combine results into a DF
if box_score_usage_24_25_1H:
    usage_24_25_1H = pd.concat(box_score_usage_24_25_1H, ignore_index=True)
    
    print("‚úÖ Box Score Usage data retrieval complete!")

Retrieving box scores for game ID 0022400062 (Attempt 1) at 1744761099.0816658...
Retrieving box scores for game ID 0022400062 (Attempt 1) at 1744761099.2687564...
Retrieving box scores for game ID 0022400061 (Attempt 1) at 1744761099.3076308...
Retrieving box scores for game ID 0022400061 (Attempt 1) at 1744761099.3819194...
Retrieving box scores for game ID 0022400071 (Attempt 1) at 1744761099.41148...
Retrieving box scores for game ID 0022400068 (Attempt 1) at 1744761099.470867...
Retrieving box scores for game ID 0022400069 (Attempt 1) at 1744761099.5438414...
Retrieving box scores for game ID 0022400072 (Attempt 1) at 1744761099.603207...
Retrieving box scores for game ID 0022400065 (Attempt 1) at 1744761099.6644485...
Retrieving box scores for game ID 0022400070 (Attempt 1) at 1744761099.7283573...
Retrieving box scores for game ID 0022400067 (Attempt 1) at 1744761099.7961078...
Retrieving box scores for game ID 0022400072 (Attempt 1) at 1744761099.8640554...
Retrieving box score

In [127]:
usage_24_25_1H.columns

Index(['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'PLAYER_ID',
       'PLAYER_NAME', 'NICKNAME', 'START_POSITION', 'COMMENT', 'MIN',
       'USG_PCT', 'PCT_FGM', 'PCT_FGA', 'PCT_FG3M', 'PCT_FG3A', 'PCT_FTM',
       'PCT_FTA', 'PCT_OREB', 'PCT_DREB', 'PCT_REB', 'PCT_AST', 'PCT_TOV',
       'PCT_STL', 'PCT_BLK', 'PCT_BLKA', 'PCT_PF', 'PCT_PFD', 'PCT_PTS'],
      dtype='object')

In [129]:
usage_24_25_1H.head()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,PCT_DREB,PCT_REB,PCT_AST,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS
0,22400062,1610612750,MIN,Minnesota,1630183,Jaden McDaniels,Jaden,F,,16.000000:00,...,0.154,0.125,0.143,0.1,1.0,0.0,0.5,0.625,0.25,0.222
1,22400062,1610612750,MIN,Minnesota,203944,Julius Randle,Julius,F,,34.000000:19,...,0.24,0.257,0.4,0.2,0.0,0.0,0.167,0.25,0.313,0.222
2,22400062,1610612750,MIN,Minnesota,203497,Rudy Gobert,Rudy,C,,35.000000:20,...,0.379,0.412,0.133,0.077,0.0,1.0,0.5,0.235,0.278,0.178
3,22400062,1610612750,MIN,Minnesota,1630162,Anthony Edwards,Anthony,G,,41.000000:04,...,0.182,0.143,0.214,0.308,0.0,0.0,0.0,0.176,0.167,0.3
4,22400062,1610612750,MIN,Minnesota,201144,Mike Conley,Mike,G,,20.000000:13,...,0.154,0.235,0.286,0.25,0.5,0.0,0.2,0.083,0.1,0.156


In [131]:
usage_24_25_1H.shape[0]

41797

In [133]:
# Export to CSV
usage_24_25_1H.to_csv("usage_24_25_1H.csv", index=False)

##### 2024-25 Post All-Star

In [4]:
# Takes 4+ hours to run
# Function to fetch and process data for each box score with up to 5 retries
def fetch_box_scores(game_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving box scores for game ID {game_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call box scores for each game with increased timeout (from default 30 to 60)
            response = BoxScoreUsageV2(game_id=game_id, timeout=60)
            data_frames = response.get_data_frames()

            # Ensure box score data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No box score data for game ID {game_id}")
                return None

            box_score_df = data_frames[0].copy()
            box_score_df["GAME_ID"] = game_id
            return box_score_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for game ID {game_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for game ID {game_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying game ID {game_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for game ID {game_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store box score data for all games in a list
box_score_usage_24_25_2H = []

# Run multiple requests in parallel and submit future objects into game dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_game = {
        executor.submit(fetch_box_scores, gid): gid
        for gid in game_ids_24_25_2H["GAME_ID"]
    }

    # Process results as soon as they're completed and add results from future_to_game function to box score list
    for future in as_completed(future_to_game):
        result = future.result()
        if result is not None:
            box_score_usage_24_25_2H.append(result)

# Combine results into a DF
if box_score_usage_24_25_2H:
    usage_24_25_2H = pd.concat(box_score_usage_24_25_2H, ignore_index=True)
    
    print("‚úÖ Box Score Usage data retrieval complete!")

NameError: name 'ThreadPoolExecutor' is not defined

In [137]:
usage_24_25_2H.shape[0]

20719

In [139]:
# Export to CSV
usage_24_25_2H.to_csv("usage_24_25_2H.csv", index=False)

##### 2023-24

In [141]:
# Takes 4+ hours to run
# Function to fetch and process data for each box score with up to 5 retries
def fetch_box_scores(game_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving box scores for game ID {game_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call box scores for each game with increased timeout (from default 30 to 60)
            response = BoxScoreUsageV2(game_id=game_id, timeout=60)
            data_frames = response.get_data_frames()

            # Ensure box score data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No box score data for game ID {game_id}")
                return None

            box_score_df = data_frames[0].copy()
            box_score_df["GAME_ID"] = game_id
            return box_score_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for game ID {game_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for game ID {game_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying game ID {game_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for game ID {game_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store box score data for all games in a list
box_score_usage_23_24 = []

# Run multiple requests in parallel and submit future objects into game dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_game = {
        executor.submit(fetch_box_scores, gid): gid
        for gid in game_ids_23_24["GAME_ID"]
    }

    # Process results as soon as they're completed and add results from future_to_game function to box score list
    for future in as_completed(future_to_game):
        result = future.result()
        if result is not None:
            box_score_usage_23_24.append(result)

# Combine results into a DF
if box_score_usage_23_24:
    usage_23_24 = pd.concat(box_score_usage_23_24, ignore_index=True)
    
    print("‚úÖ Box Score Usage data retrieval complete!")

Retrieving box scores for game ID 0022300061 (Attempt 1) at 1744813056.7492018...
Retrieving box scores for game ID 0022300061 (Attempt 1) at 1744813057.4520018...
Retrieving box scores for game ID 0022300062 (Attempt 1) at 1744813057.4712052...
Retrieving box scores for game ID 0022300062 (Attempt 1) at 1744813058.0499191...
Retrieving box scores for game ID 0022300068 (Attempt 1) at 1744813058.0725517...
Retrieving box scores for game ID 0022300069 (Attempt 1) at 1744813058.798308...
Retrieving box scores for game ID 0022300066 (Attempt 1) at 1744813059.53334...
Retrieving box scores for game ID 0022300067 (Attempt 1) at 1744813060.3212082...
Retrieving box scores for game ID 0022300064 (Attempt 1) at 1744813061.0653522...
Retrieving box scores for game ID 0022300070 (Attempt 1) at 1744813061.624855...
Retrieving box scores for game ID 0022300072 (Attempt 1) at 1744813062.250747...
Retrieving box scores for game ID 0022300066 (Attempt 1) at 1744813062.9999719...
Retrieving box scores

In [47]:
usage_23_24.shape[0]

NameError: name 'usage_23_24' is not defined

In [145]:
# Export to CSV
usage_23_24.to_csv("usage_23_24.csv", index=False)

### AGGRESSION

#### BoxScoreMiscV2

In [31]:
from nba_api.stats.endpoints import BoxScoreMiscV2

##### 2024-25 Pre All-Star

In [31]:
# Takes ~6 hours to run
# Function to fetch and process data for each box score with up to 5 retries
def fetch_box_scores(game_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving box scores for game ID {game_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call box scores for each game with increased timeout (from default 30 to 60)
            response = BoxScoreMiscV2(game_id=game_id, timeout=60)
            data_frames = response.get_data_frames()

            # Ensure box score data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No box score data for game ID {game_id}")
                return None

            box_score_df = data_frames[0].copy()
            box_score_df["GAME_ID"] = game_id
            return box_score_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for game ID {game_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for game ID {game_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying game ID {game_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for game ID {game_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store box score data for all games in a list
box_score_misc_24_25_1H = []

# Run multiple requests in parallel and submit future objects into game dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_game = {
        executor.submit(fetch_box_scores, gid): gid
        for gid in game_ids_24_25_1H["GAME_ID"]
    }

    # Process results as soon as they're completed and add results from future_to_game function to box score list
    for future in as_completed(future_to_game):
        result = future.result()
        if result is not None:
            box_score_misc_24_25_1H.append(result)

# Combine results into a DF
if box_score_misc_24_25_1H:
    misc_24_25_1H = pd.concat(box_score_misc_24_25_1H, ignore_index=True)
    
    print("‚úÖ Box Score Miscellaneous data retrieval complete!")

Retrieving box scores for game ID 0022400062 (Attempt 1) at 1744901477.2888703...
Retrieving box scores for game ID 0022400061 (Attempt 1) at 1744901477.8309216...
Retrieving box scores for game ID 0022400062 (Attempt 1) at 1744901478.3561754...
Retrieving box scores for game ID 0022400061 (Attempt 1) at 1744901478.3727503...
Retrieving box scores for game ID 0022400066 (Attempt 1) at 1744901478.3854754...
Retrieving box scores for game ID 0022400063 (Attempt 1) at 1744901479.086013...
Retrieving box scores for game ID 0022400070 (Attempt 1) at 1744901479.7135398...
Retrieving box scores for game ID 0022400064 (Attempt 1) at 1744901480.2111654...
Retrieving box scores for game ID 0022400071 (Attempt 1) at 1744901480.9040437...
Retrieving box scores for game ID 0022400068 (Attempt 1) at 1744901481.4699926...
Retrieving box scores for game ID 0022400069 (Attempt 1) at 1744901482.1691353...
Retrieving box scores for game ID 0022400064 (Attempt 1) at 1744901482.7579467...
Retrieving box sc

In [49]:
misc_24_25_1H.shape[0]

41730

In [51]:
misc_24_25_1H.columns

Index(['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'PLAYER_ID',
       'PLAYER_NAME', 'NICKNAME', 'START_POSITION', 'COMMENT', 'MIN',
       'PTS_OFF_TOV', 'PTS_2ND_CHANCE', 'PTS_FB', 'PTS_PAINT',
       'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT',
       'BLK', 'BLKA', 'PF', 'PFD'],
      dtype='object')

In [55]:
# Combine list of dataframes into a single DF

box_score_misc_24_25_1H_separate_DFs = box_score_misc_24_25_1H.copy()
box_score_misc_24_25_1H = pd.concat(box_score_misc_24_25_1H_separate_DFs, ignore_index=True)

In [57]:
# Export to CSV
box_score_misc_24_25_1H.to_csv("box_score_misc_24_25_1H.csv", index=False)

##### 2024-25 Post All-Star

In [None]:
# Takes ~6 hours to run
# Function to fetch and process data for each box score with up to 5 retries
def fetch_box_scores(game_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving box scores for game ID {game_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call box scores for each game with increased timeout (from default 30 to 60)
            response = BoxScoreMiscV2(game_id=game_id, timeout=60)
            data_frames = response.get_data_frames()

            # Ensure box score data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No box score data for game ID {game_id}")
                return None

            box_score_df = data_frames[0].copy()
            box_score_df["GAME_ID"] = game_id
            return box_score_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for game ID {game_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for game ID {game_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying game ID {game_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for game ID {game_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store box score data for all games in a list
box_score_misc_24_25_2H = []

# Run multiple requests in parallel and submit future objects into game dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_game = {
        executor.submit(fetch_box_scores, gid): gid
        for gid in game_ids_24_25_2H["GAME_ID"]
    }

    # Process results as soon as they're completed and add results from future_to_game function to box score list
    for future in as_completed(future_to_game):
        result = future.result()
        if result is not None:
            box_score_misc_24_25_2H.append(result)

# Combine results into a DF
if box_score_misc_24_25_2H:
    misc_24_25_2H = pd.concat(box_score_misc_24_25_2H, ignore_index=True)
    
    print("‚úÖ Box Score Miscellaneous data retrieval complete!")

Retrieving box scores for game ID 0022400524 (Attempt 1) at 1745422557.4941049...
Retrieving box scores for game ID 0022400524 (Attempt 1) at 1745422558.068802...
Retrieving box scores for game ID 0022400795 (Attempt 1) at 1745422558.0901506...
Retrieving box scores for game ID 0022400796 (Attempt 1) at 1745422558.8632352...
Retrieving box scores for game ID 0022400796 (Attempt 1) at 1745422559.4707825...
Retrieving box scores for game ID 0022400794 (Attempt 1) at 1745422559.485424...
Retrieving box scores for game ID 0022400795 (Attempt 1) at 1745422560.0154922...
Retrieving box scores for game ID 0022400790 (Attempt 1) at 1745422560.0371013...
Retrieving box scores for game ID 0022400794 (Attempt 1) at 1745422560.610433...
Retrieving box scores for game ID 0022400793 (Attempt 1) at 1745422560.6323597...
Retrieving box scores for game ID 0022400793 (Attempt 1) at 1745422561.2476292...
Retrieving box scores for game ID 0022400792 (Attempt 1) at 1745422561.267888...
Retrieving box score

In [62]:
misc_24_25_2H.shape[0]

21364

In [64]:
# Combine list of dataframes into a single DF

box_score_misc_24_25_2H_separate_DFs = box_score_misc_24_25_2H.copy()
box_score_misc_24_25_2H = pd.concat(box_score_misc_24_25_2H_separate_DFs, ignore_index=True)

In [66]:
# Export to CSV
box_score_misc_24_25_2H.to_csv("box_score_misc_24_25_2H.csv", index=False)

##### 2023-24

In [33]:
# Takes ~6 hours to run
# Function to fetch and process data for each box score with up to 5 retries
def fetch_box_scores(game_id, retries=5):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving box scores for game ID {game_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call box scores for each game with increased timeout (from default 30 to 60)
            response = BoxScoreMiscV2(game_id=game_id, timeout=60)
            data_frames = response.get_data_frames()

            # Ensure box score data exists and return data frame per player or stop retrying if no valid data
            if not data_frames or data_frames[0].empty:
                print(f"‚ùå No box score data for game ID {game_id}")
                return None

            box_score_df = data_frames[0].copy()
            box_score_df["GAME_ID"] = game_id
            return box_score_df

        # Return error message
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
            print(f"‚ö†Ô∏è Connection issue for game ID {game_id}: {e}")
        except Exception as e:
            print(f"‚ö†Ô∏è Unexpected error retrieving data for game ID {game_id}: {e}")

        # Exponential backoff with random jitter to avoid API blocking before next attempt
        attempt += 1
        sleep_time = (2 ** attempt) + random.uniform(0, 1)
        print(f"üîÑ Retrying game ID {game_id} in {sleep_time:.2f} seconds...")
        time.sleep(sleep_time)

    print(f"üö® Failed to retrieve data for game ID {game_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 1

# Store box score data for all games in a list
box_score_misc_23_24 = []

# Run multiple requests in parallel and submit future objects into game dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_game = {
        executor.submit(fetch_box_scores, gid): gid
        for gid in game_ids_23_24["GAME_ID"]
    }

    # Process results as soon as they're completed and add results from future_to_game function to box score list
    for future in as_completed(future_to_game):
        result = future.result()
        if result is not None:
            box_score_misc_23_24.append(result)

# Combine results into a DF
if box_score_misc_23_24:
    misc_23_24 = pd.concat(box_score_misc_23_24, ignore_index=True)
    
    print("‚úÖ Box Score Miscellaneous data retrieval complete!")

Retrieving box scores for game ID 0022300062 (Attempt 1) at 1745422661.5460682...
Retrieving box scores for game ID 0022300062 (Attempt 1) at 1745422662.4282205...
Retrieving box scores for game ID 0022300061 (Attempt 1) at 1745422662.4492664...
Retrieving box scores for game ID 0022300061 (Attempt 1) at 1745422663.0075839...
Retrieving box scores for game ID 0022300068 (Attempt 1) at 1745422663.0283763...
Retrieving box scores for game ID 0022300074 (Attempt 1) at 1745422663.5913587...
Retrieving box scores for game ID 0022300067 (Attempt 1) at 1745422664.1267...
Retrieving box scores for game ID 0022300069 (Attempt 1) at 1745422664.660675...
Retrieving box scores for game ID 0022300063 (Attempt 1) at 1745422665.3922725...
Retrieving box scores for game ID 0022300074 (Attempt 1) at 1745422665.9303305...
Retrieving box scores for game ID 0022300064 (Attempt 1) at 1745422665.9508538...
Retrieving box scores for game ID 0022300064 (Attempt 1) at 1745422666.686425...
Retrieving box scores

In [45]:
# Combine list of dataframes into a single DF

box_score_misc_23_24_separate_DFs = box_score_misc_23_24.copy()
box_score_misc_23_24 = pd.concat(box_score_misc_23_24_separate_DFs, ignore_index=True)

In [47]:
# Export to CSV
box_score_misc_23_24.to_csv("box_score_misc_23_24.csv", index=False)

In [51]:
box_score_misc_23_24.shape[0]

62508

## IV. Shot Profile

### SHOT LOCATION:

In [19]:
from nba_api.stats.endpoints import PlayerDashboardByShootingSplits
from nba_api.stats.static import players

In [21]:
# Get all active player IDs
all_players = players.get_active_players()
player_ids = [p['id'] for p in all_players]
player_dict = {p['id']: p['full_name'] for p in all_players}

##### 2024-25 Pre All-Star Totals

In [23]:
# Loop through shooting splits to get shot location data for all players in 24-25 season pre all-star
# Takes ~10 minutes to run
# Method: loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 3 retries
def fetch_shooting_splits(player_id, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call shooting splits for each player
            response = PlayerDashboardByShootingSplits(
                player_id=player_id,
                season="2024-25",
                season_segment_nullable="Pre All-Star"
            )

            data_frames = response.get_data_frames()

            # Ensure shot location data (data frame 3) exists and return data frame per player
            if len(data_frames) > 3 and not data_frames[3].empty:
                shooting_splits_df = data_frames[3].copy()
                shooting_splits_df["PLAYER_ID"] = player_id
                shooting_splits_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
                return shooting_splits_df

            # Stop retrying if no valid data
            print(f"‚ùå No shot location data for player ID {player_id}")
            return None 

        # Return error message and add exponential backoff + jitter (time.sleep) to avoid API blocking before next attempt
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving data for player ID {player_id}: {e}")
            attempt += 1
            time.sleep(2 ** attempt + random.uniform(0, 1))
    
    # Give up after 3 retries
    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 3 

# Store shot location data for all players in a list
all_players_shot_location_24_25_1H = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_shooting_splits, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to shot location list
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_shot_location_24_25_1H.append(result)

# Combine results into a single DF
if all_players_shot_location_24_25_1H:
    all_players_shot_location_24_25_1H = pd.concat(all_players_shot_location_24_25_1H, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744739456.0542543...
Retrieving data for player ID 203500 (Attempt 1) at 1744739456.0607...
Retrieving data for player ID 1628389 (Attempt 1) at 1744739456.0640726...
Retrieving data for player ID 1630534 (Attempt 1) at 1744739458.2520313...
Retrieving data for player ID 1630583 (Attempt 1) at 1744739458.7898266...
Retrieving data for player ID 1641725 (Attempt 1) at 1744739458.8222542...
Retrieving data for player ID 1629638 (Attempt 1) at 1744739460.7118053...
Retrieving data for player ID 1628960 (Attempt 1) at 1744739460.716769...
Retrieving data for player ID 1628386 (Attempt 1) at 1744739460.7703385...
Retrieving data for player ID 1630631 (Attempt 1) at 1744739462.5685358...
Retrieving data for player ID 203937 (Attempt 1) at 1744739462.852729...
Retrieving data for player ID 203507 (Attempt 1) at 1744739463.1611009...
Retrieving data for player ID 1630175 (Attempt 1) at 1744739465.0233183...
Retrieving data for player ID 162

In [40]:
all_players_shot_location_24_25_1H.head()

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK,PLAYER_ID,FULL_NAME
0,Shot Area,Restricted Area,76,115,0.661,0,0,0.0,0.661,10,...,1,7,1,1,4,1,1,1,1630173,Precious Achiuwa
1,Shot Area,In The Paint (Non-RA),11,36,0.306,0,0,0.0,0.306,3,...,2,6,2,2,4,1,2,2,1630173,Precious Achiuwa
2,Shot Area,Mid-Range,4,5,0.8,0,0,0.0,0.8,0,...,4,1,3,3,4,1,4,3,1630173,Precious Achiuwa
3,Shot Area,Left Corner 3,6,16,0.375,6,16,0.375,0.563,0,...,3,1,4,3,1,1,3,3,1630173,Precious Achiuwa
4,Shot Area,Right Corner 3,1,5,0.2,1,5,0.2,0.3,0,...,5,1,4,3,2,1,5,3,1630173,Precious Achiuwa


In [38]:
all_players_shot_location_24_25_1H.shape[0]

3694

In [42]:
# Export to CSV
all_players_shot_location_24_25_1H.to_csv("all_players_shot_location_24_25_1H.csv", index=False)

##### 2024-25 Post All-Star Totals

In [44]:
# Loop through shooting splits to get shot location data for all players in 24-25 season post all-star
# Takes ~7 minutes to run
# Method: loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 3 retries
def fetch_shooting_splits(player_id, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call shooting splits for each player
            response = PlayerDashboardByShootingSplits(
                player_id=player_id,
                season="2024-25",
                season_segment_nullable="Post All-Star"
            )

            data_frames = response.get_data_frames()

            # Ensure shot location data (data frame 3) exists and return data frame per player
            if len(data_frames) > 3 and not data_frames[3].empty:
                shooting_splits_df = data_frames[3].copy()
                shooting_splits_df["PLAYER_ID"] = player_id
                shooting_splits_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
                return shooting_splits_df

            # Stop retrying if no valid data
            print(f"‚ùå No shot location data for player ID {player_id}")
            return None 

        # Return error message and add exponential backoff + jitter (time.sleep) to avoid API blocking before next attempt
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving data for player ID {player_id}: {e}")
            attempt += 1
            time.sleep(2 ** attempt + random.uniform(0, 1))
    
    # Give up after 3 retries
    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 3 

# Store shot location data for all players in a list
all_players_shot_location_24_25_2H = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_shooting_splits, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to shot location list
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_shot_location_24_25_2H.append(result)

# Combine results into a single DF
if all_players_shot_location_24_25_2H:
    all_players_shot_location_24_25_2H = pd.concat(all_players_shot_location_24_25_2H, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744740243.2000315...
Retrieving data for player ID 203500 (Attempt 1) at 1744740243.2029645...
Retrieving data for player ID 1628389 (Attempt 1) at 1744740243.2050908...
Retrieving data for player ID 1630534 (Attempt 1) at 1744740245.1912205...
Retrieving data for player ID 1630583 (Attempt 1) at 1744740245.8137631...
Retrieving data for player ID 1641725 (Attempt 1) at 1744740245.8222263...
Retrieving data for player ID 1629638 (Attempt 1) at 1744740247.6569426...
Retrieving data for player ID 1628960 (Attempt 1) at 1744740247.9585614...
Retrieving data for player ID 1628386 (Attempt 1) at 1744740248.2721627...
Retrieving data for player ID 1630631 (Attempt 1) at 1744740249.9258451...
Retrieving data for player ID 203937 (Attempt 1) at 1744740250.1119313...
Retrieving data for player ID 203507 (Attempt 1) at 1744740250.8333714...
Retrieving data for player ID 1630175 (Attempt 1) at 1744740252.056192...
Retrieving data for player ID

In [47]:
all_players_shot_location_24_25_2H.shape[0]

3540

In [None]:
all_players_shot_location_24_25_2H.head()

In [49]:
# Export to CSV
all_players_shot_location_24_25_2H.to_csv("all_players_shot_location_24_25_2H.csv", index=False)

##### 2023-24 Totals

In [51]:
# Loop through shooting splits to get shot location data for all players in 23-24 season
# Takes 7 minutes to run
# Method: loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 3 retries
def fetch_shooting_splits(player_id, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call shooting splits for each player
            response = PlayerDashboardByShootingSplits(
                player_id=player_id,
                season="2023-24"
            )

            data_frames = response.get_data_frames()

            # Ensure shot location data (data frame 3) exists and return data frame per player
            if len(data_frames) > 3 and not data_frames[3].empty:
                shooting_splits_df = data_frames[3].copy()
                shooting_splits_df["PLAYER_ID"] = player_id
                shooting_splits_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
                return shooting_splits_df

            # Stop retrying if no valid data
            print(f"‚ùå No shot location data for player ID {player_id}")
            return None 

        # Return error message and add exponential backoff + jitter (time.sleep) to avoid API blocking before next attempt
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving data for player ID {player_id}: {e}")
            attempt += 1
            time.sleep(2 ** attempt + random.uniform(0, 1))
    
    # Give up after 3 retries
    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 3 

# Store shot location data for all players in a list
all_players_shot_location_23_24 = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_shooting_splits, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to shot location list
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_shot_location_23_24.append(result)

# Combine results into a single DF
if all_players_shot_location_23_24:
    all_players_shot_location_23_24 = pd.concat(all_players_shot_location_23_24, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744740735.311879...
Retrieving data for player ID 203500 (Attempt 1) at 1744740735.315544...
Retrieving data for player ID 1628389 (Attempt 1) at 1744740735.318097...
‚ùå No shot location data for player ID 203500
Retrieving data for player ID 1630534 (Attempt 1) at 1744740737.1338625...
Retrieving data for player ID 1630583 (Attempt 1) at 1744740737.279925...
Retrieving data for player ID 1641725 (Attempt 1) at 1744740737.7504406...
Retrieving data for player ID 1629638 (Attempt 1) at 1744740739.3918989...
Retrieving data for player ID 1628960 (Attempt 1) at 1744740739.782729...
‚ùå No shot location data for player ID 1641725
Retrieving data for player ID 1628386 (Attempt 1) at 1744740739.891688...
Retrieving data for player ID 1630631 (Attempt 1) at 1744740741.1097534...
Retrieving data for player ID 203937 (Attempt 1) at 1744740741.713038...
Retrieving data for player ID 203507 (Attempt 1) at 1744740741.7238889...
Retrieving data

In [53]:
all_players_shot_location_23_24.shape[0]

3110

In [None]:
all_players_shot_location_23_24.head()

In [55]:
# Export to CSV
all_players_shot_location_23_24.to_csv("all_players_shot_location_23_24.csv", index=False)

### UNASSISTED SHOTS

In [55]:
from nba_api.stats.endpoints import PlayerDashboardByShootingSplits
from nba_api.stats.static import players

In [57]:
# Get all active player IDs
all_players = players.get_active_players()
player_ids = [p['id'] for p in all_players]
player_dict = {p['id']: p['full_name'] for p in all_players}

##### *Test Coby White Stats*

In [78]:
Coby_White_unassisted_shots_24_25 = PlayerDashboardByShootingSplits(
                player_id=1629632,
                season="2024-25"
            ).get_data_frames()[4]
Coby_White_unassisted_shots_24_25

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,FG3A_RANK,FG3_PCT_RANK,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK
0,Assisted Shot,Assisted,262,262,1.0,164,164,1.0,1.313,0,...,1,1,1,1,1,2,1,2,1,2
1,Assisted Shot,Unassisted,246,246,1.0,52,52,1.0,1.106,0,...,2,1,2,1,2,1,2,1,2,1


In [72]:
Coby_White_unassisted_shots_24_25_1H = PlayerDashboardByShootingSplits(
                player_id=1629632,
                season="2024-25",
                season_segment_nullable="Pre All-Star"
            ).get_data_frames()[4]
Coby_White_unassisted_shots_24_25_1H

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,FG3A_RANK,FG3_PCT_RANK,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK
0,Assisted Shot,Assisted,170,170,1.0,116,116,1.0,1.341,0,...,1,1,1,1,1,2,1,2,1,2
1,Assisted Shot,Unassisted,122,122,1.0,30,30,1.0,1.123,0,...,2,1,2,1,2,1,2,1,2,1


##### 2024-25 Pre All-Star Totals

In [61]:
# Loop through shooting splits to get unassisted shots data for all players in 24-25 season pre all-star
# Takes ~1.75 hours to run
# Method: loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 3 retries
def fetch_shooting_splits(player_id, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call shooting splits for each player
            response = PlayerDashboardByShootingSplits(
                player_id=player_id,
                season="2024-25",
                season_segment_nullable="Pre All-Star"
            )

            data_frames = response.get_data_frames()

            # Ensure unassisted shots data (data frame 4) exists and return data frame per player
            if len(data_frames) > 3 and not data_frames[4].empty:
                shooting_splits_df = data_frames[4].copy()
                shooting_splits_df["PLAYER_ID"] = player_id
                shooting_splits_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
                return shooting_splits_df

            # Stop retrying if no valid data
            print(f"‚ùå No unassited shots data for player ID {player_id}")
            return None 

        # Return error message and add exponential backoff + jitter (time.sleep) to avoid API blocking before next attempt
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving data for player ID {player_id}: {e}")
            attempt += 1
            time.sleep(2 ** attempt + random.uniform(0, 1))
    
    # Give up after 3 retries
    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 3 

# Store unassisted shots data for all players in a list
all_players_unassisted_shots_24_25_1H = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_shooting_splits, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to unassisted shots data list
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_unassisted_shots_24_25_1H.append(result)

# Combine results into a single DF
if all_players_unassisted_shots_24_25_1H:
    all_players_unassisted_shots_24_25_1H = pd.concat(all_players_unassisted_shots_24_25_1H, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744741306.2746496...
Retrieving data for player ID 203500 (Attempt 1) at 1744741306.287649...
Retrieving data for player ID 1628389 (Attempt 1) at 1744741306.2899039...
Retrieving data for player ID 1630534 (Attempt 1) at 1744741306.3545516...
Retrieving data for player ID 1630583 (Attempt 1) at 1744741306.3884075...
Retrieving data for player ID 1641725 (Attempt 1) at 1744741306.4190202...
Retrieving data for player ID 1629638 (Attempt 1) at 1744741306.4713597...
Retrieving data for player ID 1628960 (Attempt 1) at 1744741306.485579...
Retrieving data for player ID 1628386 (Attempt 1) at 1744741306.5196958...
Retrieving data for player ID 1630631 (Attempt 1) at 1744741306.5433474...
Retrieving data for player ID 203937 (Attempt 1) at 1744741306.5675282...
Retrieving data for player ID 203507 (Attempt 1) at 1744741306.5890012...
Retrieving data for player ID 1630175 (Attempt 1) at 1744741306.6089487...
Retrieving data for player ID 

In [65]:
all_players_unassisted_shots_24_25_1H.head()

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK,PLAYER_ID,FULL_NAME
0,Assisted Shot,Assisted,72,72,1.0,8,8,1.0,1.056,0,...,1,1,1,2,1,1,1,2,1630173,Precious Achiuwa
1,Assisted Shot,Unassisted,27,27,1.0,0,0,0.0,1.0,0,...,2,1,2,1,2,1,2,1,1630173,Precious Achiuwa
2,Assisted Shot,Assisted,210,210,1.0,33,33,1.0,1.079,0,...,1,1,1,2,1,2,1,2,1628389,Bam Adebayo
3,Assisted Shot,Unassisted,120,120,1.0,7,7,1.0,1.029,0,...,2,1,2,1,2,1,2,1,1628389,Bam Adebayo
4,Assisted Shot,Assisted,176,176,1.0,73,73,1.0,1.207,0,...,1,1,1,2,1,2,1,2,1630534,Ochai Agbaji


In [67]:
all_players_unassisted_shots_24_25_1H.shape[0]

654

In [69]:
# Export to CSV
all_players_unassisted_shots_24_25_1H.to_csv("all_players_unassisted_shots_24_25_1H.csv", index=False)

##### 2024-25 Post All-Star Totals

In [72]:
# Loop through shooting splits to get unassisted shots data for all players in 24-25 season post all-star
# Takes ~1.75 hours to run
# Method: loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 3 retries
def fetch_shooting_splits(player_id, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call shooting splits for each player
            response = PlayerDashboardByShootingSplits(
                player_id=player_id,
                season="2024-25",
                season_segment_nullable="Post All-Star"
            )

            data_frames = response.get_data_frames()

            # Ensure unassisted shots data (data frame 4) exists and return data frame per player
            if len(data_frames) > 3 and not data_frames[4].empty:
                shooting_splits_df = data_frames[4].copy()
                shooting_splits_df["PLAYER_ID"] = player_id
                shooting_splits_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
                return shooting_splits_df

            # Stop retrying if no valid data
            print(f"‚ùå No unassited shots data for player ID {player_id}")
            return None 

        # Return error message and add exponential backoff + jitter (time.sleep) to avoid API blocking before next attempt
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving data for player ID {player_id}: {e}")
            attempt += 1
            time.sleep(2 ** attempt + random.uniform(0, 1))
    
    # Give up after 3 retries
    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 3 

# Store unassisted shots data for all players in a list
all_players_unassisted_shots_24_25_2H = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_shooting_splits, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to unassisted shots data list
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_unassisted_shots_24_25_2H.append(result)

# Combine results into a single DF
if all_players_unassisted_shots_24_25_2H:
    all_players_unassisted_shots_24_25_2H = pd.concat(all_players_unassisted_shots_24_25_2H, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744748457.2729292...
Retrieving data for player ID 203500 (Attempt 1) at 1744748457.2770772...
Retrieving data for player ID 1628389 (Attempt 1) at 1744748457.2795444...
Retrieving data for player ID 1630534 (Attempt 1) at 1744748457.3557003...
Retrieving data for player ID 1630583 (Attempt 1) at 1744748457.3678544...
Retrieving data for player ID 1641725 (Attempt 1) at 1744748457.3866177...
Retrieving data for player ID 1629638 (Attempt 1) at 1744748457.4020882...
Retrieving data for player ID 1628960 (Attempt 1) at 1744748457.417911...
Retrieving data for player ID 1628386 (Attempt 1) at 1744748457.438057...
Retrieving data for player ID 1630631 (Attempt 1) at 1744748457.460056...
Retrieving data for player ID 203937 (Attempt 1) at 1744748457.4680574...
Retrieving data for player ID 203507 (Attempt 1) at 1744748457.4999242...
Retrieving data for player ID 1630175 (Attempt 1) at 1744748457.521445...
Retrieving data for player ID 16

In [74]:
all_players_unassisted_shots_24_25_2H.shape[0]

712

In [76]:
all_players_unassisted_shots_24_25_2H.head()

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK,PLAYER_ID,FULL_NAME
0,Assisted Shot,Assisted,12,12,1.0,0,0,0.0,1.0,0,...,1,1,1,2,1,1,1,2,203500,Steven Adams
1,Assisted Shot,Unassisted,24,24,1.0,0,0,0.0,1.0,0,...,1,1,2,1,1,1,2,1,203500,Steven Adams
2,Assisted Shot,Assisted,139,139,1.0,37,37,1.0,1.133,0,...,1,1,1,2,1,2,1,2,1628389,Bam Adebayo
3,Assisted Shot,Unassisted,71,71,1.0,2,2,1.0,1.014,0,...,2,1,2,1,2,1,2,1,1628389,Bam Adebayo
4,Assisted Shot,Assisted,45,45,1.0,2,2,1.0,1.022,0,...,1,1,1,2,1,1,1,2,1630173,Precious Achiuwa


In [78]:
# Export to CSV
all_players_unassisted_shots_24_25_2H.to_csv("all_players_unassisted_shots_24_25_2H.csv", index=False)

##### 2023-24 Totals

In [80]:
# Loop through shooting splits to get unassisted shots data for all players in 23-24 season
# Takes ~30 min to run
# Method: loop through each player_id using parallel threading, smart rate-limiting, and a retry mechanism to avoid NBA rate limit errors

player_info = {player["id"]: player["full_name"] for player in all_players}

# Function to fetch and process data for a single player with up to 3 retries
def fetch_shooting_splits(player_id, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            print(f"Retrieving data for player ID {player_id} (Attempt {attempt+1}) at {time.time()}...")

            # Call shooting splits for each player
            response = PlayerDashboardByShootingSplits(
                player_id=player_id,
                season="2023-24"
            )

            data_frames = response.get_data_frames()

            # Ensure unassisted shots data (data frame 4) exists and return data frame per player
            if len(data_frames) > 3 and not data_frames[4].empty:
                shooting_splits_df = data_frames[4].copy()
                shooting_splits_df["PLAYER_ID"] = player_id
                shooting_splits_df["FULL_NAME"] = player_info.get(player_id, "Unknown")
                return shooting_splits_df

            # Stop retrying if no valid data
            print(f"‚ùå No unassited shots data for player ID {player_id}")
            return None 

        # Return error message and add exponential backoff + jitter (time.sleep) to avoid API blocking before next attempt
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving data for player ID {player_id}: {e}")
            attempt += 1
            time.sleep(2 ** attempt + random.uniform(0, 1))
    
    # Give up after 3 retries
    print(f"üö® Failed to retrieve data for player ID {player_id} after {retries} attempts.")
    return None

# Limit concurrency to reduce rate-limit issues
max_workers = 3 

# Store unassisted shots data for all players in a list
all_players_unassisted_shots_23_24 = []

# Run multiple requests in parallel and submit future objects into player dictionary
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_player = {executor.submit(fetch_shooting_splits, pid): pid for pid in player_ids} 

    # Process results as soon as they're completed and add results from future_to_player function to unassisted shots data list
    for future in as_completed(future_to_player):
        result = future.result()
        if result is not None:
            all_players_unassisted_shots_23_24.append(result)

# Combine results into a single DF
if all_players_unassisted_shots_23_24:
    all_players_unassisted_shots_23_24 = pd.concat(all_players_unassisted_shots_23_24, ignore_index=True)

print("‚úÖ Data retrieval complete!")

Retrieving data for player ID 1630173 (Attempt 1) at 1744755834.289265...
Retrieving data for player ID 203500 (Attempt 1) at 1744755834.291905...
Retrieving data for player ID 1628389 (Attempt 1) at 1744755834.2939079...
‚ùå No unassited shots data for player ID 203500
Retrieving data for player ID 1630534 (Attempt 1) at 1744755836.8843508...
Retrieving data for player ID 1630583 (Attempt 1) at 1744755837.210584...
Retrieving data for player ID 1641725 (Attempt 1) at 1744755837.36335...
Retrieving data for player ID 1629638 (Attempt 1) at 1744755838.6777198...
Retrieving data for player ID 1628960 (Attempt 1) at 1744755838.9485543...
‚ùå No unassited shots data for player ID 1641725
Retrieving data for player ID 1628386 (Attempt 1) at 1744755839.0278554...
Retrieving data for player ID 1630631 (Attempt 1) at 1744755840.9149342...
Retrieving data for player ID 203937 (Attempt 1) at 1744755840.9714868...
Retrieving data for player ID 203507 (Attempt 1) at 1744755841.345797...
Retrieving

In [84]:
all_players_unassisted_shots_23_24.head()

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK,PLAYER_ID,FULL_NAME
0,Assisted Shot,Assisted,149,149,1.0,25,25,1.0,1.084,0,...,1,1,1,2,1,2,1,2,1630173,Precious Achiuwa
1,Assisted Shot,Unassisted,86,86,1.0,1,1,1.0,1.006,0,...,2,1,2,1,2,1,2,1,1630173,Precious Achiuwa
2,Assisted Shot,Assisted,310,310,1.0,15,15,1.0,1.024,0,...,1,1,1,2,1,1,1,2,1628389,Bam Adebayo
3,Assisted Shot,Unassisted,220,220,1.0,0,0,0.0,1.0,0,...,2,1,2,1,2,1,2,1,1628389,Bam Adebayo
4,Assisted Shot,Assisted,148,148,1.0,61,61,1.0,1.206,0,...,1,1,1,2,1,2,1,2,1630534,Ochai Agbaji


In [86]:
all_players_unassisted_shots_23_24.shape[0]

870

In [88]:
# Export to CSV
all_players_unassisted_shots_23_24.to_csv("all_players_unassisted_shots_23_24.csv", index=False)

In [None]:
league_24_25_general_stats_per_game = pd.read_csv("C:\\Users\\mmillan\\OneDrive - United Center\\Coby White Analysis/league_24_25_general_stats_per_game.csv")
league_24_25_general_stats_per_game

In [None]:
# Totals for 2024-25 Pre All-Star

# Merge using an outer join to catch all player data and see where some data may be missing
shot_location_totals_24_25_1H = pd.merge(shot_location_totals_24_25_1H, player_game_counts_24_25_1H, on = 'FULL_NAME', how = 'outer')
shot_location_totals_24_25_1H

In [None]:
# Totals for 2024-25 Post All-Star
shot_location_totals_24_25_2H = pd.merge(shot_location_totals_24_25_2H, player_game_counts_24_25_2H, on = 'FULL_NAME', how = 'outer')
shot_location_totals_24_25_2H

In [None]:
# Totals for 2023-24
shot_location_totals_23_24 = pd.merge(shot_location_totals_23_24, player_game_counts_23_24, on = 'FULL_NAME', how = 'outer')
shot_location_totals_23_24

In [None]:
Coby_White_shot_location_totals_23_24 = shot_location_totals_23_24[shot_location_totals_23_24["FULL_NAME"] == "Coby White"]
Coby_White_shot_location_totals_23_24.T

In [None]:
shot_location_totals_23_24.columns

In [None]:
# Specify stats that need to be averaged
stat_columns = ['FGM', 'FGA', 'FG3M', 'FG3A','BLKA']

##### 2024-25 Pre All-Star:

In [None]:
shot_location_avg_24_25_1H = shot_location_totals_24_25_1H.copy().reset_index(drop=True)

for col in stat_columns:
    shot_location_avg_24_25_1H[col] = shot_location_avg_24_25_1H[col]/shot_location_avg_24_25_1H["num_games_24_25_1H"]

shot_location_avg_24_25_1H

In [None]:
# Drop rows with null values
shot_location_avg_24_25_1H_clean = shot_location_avg_24_25_1H.dropna(subset=stat_columns)
shot_location_avg_24_25_1H_clean

##### 2024-25 Post All-Star:

In [None]:
shot_location_avg_24_25_2H = shot_location_totals_24_25_2H.copy().reset_index(drop=True)

for col in stat_columns:
    shot_location_avg_24_25_2H[col] = shot_location_avg_24_25_2H[col]/shot_location_avg_24_25_2H["num_games_24_25_2H"]

shot_location_avg_24_25_2H

In [None]:
# Drop rows with null values
shot_location_avg_24_25_2H_clean = shot_location_avg_24_25_2H.dropna(subset=stat_columns)
shot_location_avg_24_25_2H_clean

##### 2023-24:

In [None]:
shot_location_avg_23_24 = shot_location_totals_23_24.copy().reset_index(drop=True)

for col in stat_columns:
    shot_location_avg_23_24[col] = shot_location_avg_23_24[col]/shot_location_avg_23_24['num_games_23_24']

shot_location_avg_23_24

In [None]:
# Drop rows with null values
shot_location_avg_23_24_clean = shot_location_avg_23_24.dropna(subset=stat_columns)
shot_location_avg_23_24_clean

##### *Test 23-24 avgs*

In [None]:
shot_location_avg_23_24 = shot_location_totals_23_24.copy().reset_index(drop=True)
shot_location_avg_23_24[shot_location_avg_23_24['FULL_NAME'] == 'Coby White']

In [None]:
for col in stat_columns:
    shot_location_avg_23_24[col] = shot_location_avg_23_24[col]/shot_location_avg_23_24['num_games_23_24']

shot_location_avg_23_24

In [None]:
shot_location_avg_23_24[shot_location_avg_23_24['FULL_NAME'] == 'Coby White']

##### *Troubleshoot 23-24 error*

In [None]:
print(stat_columns)

In [None]:
print(shot_location_avg_24_25_1H["num_games_24_25_1H"].dtype)
print(shot_location_avg_24_25_1H["num_games_24_25_1H"].isnull().sum())
print((shot_location_avg_24_25_1H["num_games_24_25_1H"] == 0).sum())

In [None]:
print(shot_location_avg_23_24_clean["num_games_23_24"].dtype)
print(shot_location_avg_23_24_clean["num_games_23_24"].isnull().sum())
print((shot_location_avg_23_24_clean["num_games_23_24"] == 0).sum())

In [None]:
shot_location_avg_23_24[stat_columns] = shot_location_avg_23_24[stat_columns].div(
    shot_location_avg_23_24["num_games_23_24"], axis=0
)

In [None]:
shot_location_avg_23_24 = shot_location_totals_23_24.copy().reset_index(drop=True)


##### *Test Coby averages*

In [None]:
shot_location_avg_23_24[shot_location_avg_23_24['FULL_NAME'] == 'Coby White']

In [None]:
test_totals = Coby_White_shot_location_totals_23_24[['FULL_NAME', 'GROUP_VALUE', 'FGM']]
test_totals

In [None]:
Coby_White_game_counts

In [None]:
test_totals_merged = pd.merge(test_totals, Coby_White_game_counts, on = 'FULL_NAME', how = 'outer')
test_totals_merged

In [None]:
test_stat_columns = ['FGM']

test_avg = test_totals_merged.copy()

for col in test_stat_columns:
    test_avg[col] = test_avg[col]/test_avg['num_games_23_24']

test_avg

In [None]:
# All player code (for reference)
shot_location_avg_23_24 = shot_location_totals_23_24.copy()

for col in stat_columns:
    shot_location_avg_23_24[col] = shot_location_avg_23_24[col]/shot_location_avg_23_24["num_games_23_24"]

shot_location_avg_23_24

In [None]:
Coby_White_shot_location_avg_24_25_1H = shot_location_avg_24_25_1H_clean[shot_location_avg_24_25_1H_clean["FULL_NAME"] == "Coby White"]
Coby_White_shot_location_avg_24_25_1H.T

In [None]:
Coby_White_shot_location_avg_24_25_2H = shot_location_avg_24_25_2H_clean[shot_location_avg_24_25_2H_clean["FULL_NAME"] == "Coby White"]
Coby_White_shot_location_avg_24_25_2H.T

In [None]:
Coby_White_shot_location_avg_23_24 = shot_location_avg_23_24_clean[shot_location_avg_23_24_clean["FULL_NAME"] == "Coby White"]
Coby_White_shot_location_avg_23_24.T

In [None]:
# Specify stats that need to find percentile ranks for:
stat_columns_percentiles = ['FGM', 'FGA', 'FG3M', 'FG3A','BLKA', 'FG_PCT', 'FG3_PCT', 'EFG_PCT', 
                'PCT_AST_2PM', 'PCT_UAST_2PM','PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM']

In [None]:
# Function to calculate percentrank for Coby's stats against all other players in 2024-25 season pre all-star
def get_percentile(value, series):
    return percentileofscore(series, value, kind='rank')

Coby_White_shot_location_summary_24_25_1H = Coby_White_shot_location_avg_24_25_1H.copy()

# Calculate percentile rank for each stat column
for col in stat_columns_percentiles:
    Coby_White_shot_location_summary_24_25_1H[f'{col}_percentile'] = Coby_White_shot_location_summary_24_25_1H[col].apply(
        lambda x: get_percentile(x, shot_location_avg_24_25_1H_clean[col])
    )

# Remove unnecessary columns
essential_columns = ['PLAYER_ID', 'FULL_NAME', 'GROUP_SET', 'GROUP_VALUE']
percentile_columns = [f"{col}_percentile" for col in stat_columns_percentiles]
final_columns = essential_columns + stat_columns_percentiles + percentile_columns

Coby_White_shot_location_summary_24_25_1H = Coby_White_shot_location_summary_24_25_1H[final_columns]
Coby_White_shot_location_summary_24_25_1H.T

In [None]:
# Function to calculate percentrank for Coby's stats against all other players in 2024-25 season post all-star
def get_percentile(value, series):
    return percentileofscore(series, value, kind='rank')

Coby_White_shot_location_summary_24_25_2H = Coby_White_shot_location_avg_24_25_2H.copy()

# Calculate percentile rank for each stat column
for col in stat_columns_percentiles:
    Coby_White_shot_location_summary_24_25_2H[f'{col}_percentile'] = Coby_White_shot_location_summary_24_25_2H[col].apply(
        lambda x: get_percentile(x, shot_location_avg_24_25_2H_clean[col])
    )

# Remove unnecessary columns
essential_columns = ['PLAYER_ID', 'FULL_NAME', 'GROUP_SET', 'GROUP_VALUE']
percentile_columns = [f"{col}_percentile" for col in stat_columns_percentiles]
final_columns = essential_columns + stat_columns_percentiles + percentile_columns

Coby_White_shot_location_summary_24_25_2H = Coby_White_shot_location_summary_24_25_2H[final_columns]
Coby_White_shot_location_summary_24_25_2H.T

In [None]:
# Function to calculate percentrank for Coby's stats against all other players in 2023-24 season
def get_percentile(value, series):
    return percentileofscore(series, value, kind='rank')

Coby_White_shot_location_summary_23_24 = Coby_White_shot_location_avg_23_24.copy()

# Calculate percentile rank for each stat column
for col in stat_columns_percentiles:
    Coby_White_shot_location_summary_23_24[f'{col}_percentile'] = Coby_White_shot_location_summary_23_24[col].apply(
        lambda x: get_percentile(x, shot_location_avg_23_24_clean[col])
    )

# Remove unnecessary columns
essential_columns = ['PLAYER_ID', 'FULL_NAME', 'GROUP_SET', 'GROUP_VALUE']
percentile_columns = [f"{col}_percentile" for col in stat_columns_percentiles]
final_columns = essential_columns + stat_columns_percentiles + percentile_columns

Coby_White_shot_location_summary_23_24 = Coby_White_shot_location_summary_23_24[final_columns]
Coby_White_shot_location_summary_23_24.T

In [None]:
# Ensure all stat_columns are numeric
shot_location_avg_23_24.loc[:, stat_columns] = shot_location_avg_23_24[stat_columns].apply(pd.to_numeric, errors='coerce')
Coby_White_shot_location_avg_23_24.loc[:, stat_columns] = Coby_White_shot_location_avg_23_24[stat_columns].apply(pd.to_numeric, errors='coerce')

In [None]:
print(Coby_White_shot_location_avg_23_24[stat_columns].isnull().sum())
print(shot_location_avg_23_24[stat_columns].isnull().sum())

In [None]:
for col in stat_columns:
    bad_vals = shot_location_avg_23_24[pd.to_numeric(shot_location_avg_23_24[col], errors='coerce').isna()][col]
    if not bad_vals.empty:
        print(f"Bad values in {col}:")
        print(bad_vals.unique())

In [None]:
# Ensure all stat_columns are numeric
shot_location_avg_23_24_clean.loc[:, stat_columns] = shot_location_avg_23_24_clean[stat_columns].apply(pd.to_numeric, errors='coerce')
print(shot_location_avg_23_24_clean[stat_columns].isnull().sum())