In [2]:
from nba_api.stats.endpoints import playergamelog, boxscoreadvancedv2
import pandas as pd
import numpy as np
import time
import random
import sys

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [5]:
class PlayerProfile:
    def __init__(self, name, player_id, seasons):
        self.name = name
        self.id = player_id
        self.seasons = seasons
        self.gamelog = self.collect_stats(player_id, seasons)
        self.advanced_stats = self

    def __str__(self):
        return self.name  # This controls what gets printed when using print(object)

    def __repr__(self):
        return f"PlayerProfile(name={self.name!r})"
    
    def collect_stats(self, player_id, seasons):
        gamelog = self.collect_gamelog(player_id, seasons)
        gamelog = self.collect_adv_stats(gamelog, player_id)

        return gamelog

    def collect_gamelog(self, player_id, seasons):
        gamelog = pd.DataFrame()
        for season in seasons:
            data = playergamelog.PlayerGameLog(player_id=player_id, season=season)
            data = data.get_dict()
            if gamelog.empty:
                cols = data['resultSets'][0]['headers']
                gamelog = pd.DataFrame(columns=cols)
            observations = data['resultSets'][0]['rowSet']
            gamelog = pd.concat([gamelog, pd.DataFrame(observations, columns=gamelog.columns)], ignore_index=True)

        return gamelog
    
    def collect_adv_stats(self, gamelog, player_id):
        '''manage advanced stats'''
        categories = ['E_USG_PCT', 'E_OFF_RATING', 'E_DEF_RATING', 'PACE_PER40', 'TS_PCT', 'AST_PCT']

        # Ensure gamelog has the columns for advanced stats
        for col in categories:
            gamelog[col] = None  # Initialize empty columns
        
        # Iterate over each game entry
        for idx, row in gamelog.iterrows():
            game_id = row['Game_ID']
            
            try:
                # Print dynamic status update (overwriting the previous game_id)
                sys.stdout.write(f"\rProcessing game: {game_id}... ")
                sys.stdout.flush()

                # Introduce a random delay before making the API request
                sleep_time = random.uniform(0,1)  # Sleep between 1 to 5 seconds
                time.sleep(sleep_time)
                stats = self.adv_stats_by_game(game_id, player_id)  # Retrieve stats
                for col in categories:
                    gamelog.at[idx, col] = stats[col]  # Update DataFrame
            except AssertionError as e:
                print(f"Skipping game {game_id} due to error: {e}")
            except Exception as e:
                print(f"Unexpected error retrieving stats for game {game_id}: {e}")

        return gamelog  # Return the updated DataFrame
        


    def adv_stats_by_game(self, game_id, player_id):
        categories = ['E_USG_PCT', 'E_OFF_RATING', 'E_DEF_RATING', 'PACE_PER40', 'TS_PCT', 'AST_PCT']
        result = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=str(game_id))
        result = result.get_dict()
        data = pd.DataFrame(
            data=result['resultSets'][0]['rowSet'],
            columns=result['resultSets'][0]['headers'])
        pl = data[data['PLAYER_ID'] == int(player_id)]
        assert len(pl) == 1, "Too many rows returned for game/player advanced stats pull."
        return pl[categories].iloc[0].to_dict()






In [6]:
SEASONS = [
    '2024-25',
    '2023-24',
    '2022-23',
    '2021-22',
    '2020-21'
]
PLAYER_ID = '1627759'
NAME = 'Jalen Brown'
player = PlayerProfile(
    name=NAME,
    player_id=PLAYER_ID,
    seasons=SEASONS
)
print(player)

  gamelog = pd.concat([gamelog, pd.DataFrame(observations, columns=gamelog.columns)], ignore_index=True)


Processing game: 0022400528... Unexpected error retrieving stats for game 0022400528: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Processing game: 0022000003... Jalen Brown


In [10]:
### Cleanup Code for missing values
missed_game = '0022400528'
categories = ['E_USG_PCT', 'E_OFF_RATING', 'E_DEF_RATING', 'PACE_PER40', 'TS_PCT', 'AST_PCT']
data = player.adv_stats_by_game(game_id=missed_game, player_id=player.id)
for cat, val in data.items():
    player.gamelog.loc[(player.gamelog['Game_ID'] == missed_game), cat] = val


{'E_USG_PCT': 0.321, 'E_OFF_RATING': 107.0, 'E_DEF_RATING': 117.2, 'PACE_PER40': 82.62, 'TS_PCT': 0.587, 'AST_PCT': 0.278}


In [11]:
player.gamelog

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,E_USG_PCT,E_OFF_RATING,E_DEF_RATING,PACE_PER40,TS_PCT,AST_PCT
0,22024,1627759,22400891,"MAR 05, 2025",BOS vs. POR,W,40,7,23,0.304,0,4,0.0,4,6,0.667,3,5,8,8,1,0,3,2,18,15,1,0.31,139.4,119.8,79.08,0.351,0.25
1,22024,1627759,22400866,"MAR 02, 2025",BOS vs. DEN,W,37,6,15,0.4,2,4,0.5,8,9,0.889,0,5,5,8,2,2,5,2,22,10,1,0.307,121.8,103.9,79.23,0.58,0.348
2,22024,1627759,22400852,"FEB 28, 2025",BOS vs. CLE,L,34,13,24,0.542,4,6,0.667,7,10,0.7,4,1,5,2,0,0,3,4,37,10,1,0.399,128.9,115.4,78.43,0.651,0.111
3,22024,1627759,22400829,"FEB 25, 2025",BOS @ TOR,W,33,10,18,0.556,0,5,0.0,4,4,1.0,1,4,5,4,1,0,3,1,24,5,1,0.287,110.4,110.7,80.2,0.607,0.19
4,22024,1627759,22400811,"FEB 23, 2025",BOS vs. NYK,W,37,10,21,0.476,2,7,0.286,2,2,1.0,1,7,8,2,2,0,2,2,24,14,1,0.293,121.4,100.5,81.38,0.548,0.083
5,22024,1627759,22400789,"FEB 20, 2025",BOS @ PHI,W,27,7,13,0.538,2,6,0.333,4,4,1.0,3,3,6,6,0,0,3,1,20,6,1,0.304,123.0,105.4,83.16,0.678,0.375
6,22024,1627759,22400748,"FEB 08, 2025",BOS @ NYK,W,32,5,14,0.357,0,3,0.0,2,2,1.0,1,2,3,5,3,1,4,3,12,9,1,0.266,122.9,102.0,81.17,0.403,0.217
7,22024,1627759,22400728,"FEB 06, 2025",BOS vs. DAL,L,33,9,16,0.563,3,4,0.75,4,4,1.0,0,5,5,2,1,0,3,1,25,-15,1,0.271,119.3,138.3,83.88,0.704,0.095
8,22024,1627759,22400710,"FEB 04, 2025",BOS @ CLE,W,28,7,12,0.583,0,0,0.0,2,2,1.0,2,7,9,6,0,0,5,5,16,-9,1,0.244,84.6,101.6,85.46,0.621,0.429
9,22024,1627759,22400698,"FEB 02, 2025",BOS @ PHI,W,42,9,17,0.529,2,7,0.286,1,2,0.5,3,7,10,6,1,0,2,4,21,17,1,0.232,139.0,117.9,79.11,0.587,0.188
