In [1]:
import os
import sys

sys.path.append(os.path.dirname(os.getcwd())) 

import pandas as pd
import numpy as np

from datetime import datetime, timedelta

from src.constants import HEADERS

pd.set_option('display.max_columns', None)

In [2]:
from nba_api.stats.endpoints.leaguegamelog import LeagueGameLog
from nba_api.stats.library.parameters import (
    Direction,
    LeagueID,
    PlayerOrTeamAbbreviation,
    Season,
    SeasonTypeAllStar,
    Sorter,
)

In [3]:
lgl = LeagueGameLog(
    counter=0,
    direction=Direction.default,
    league_id=LeagueID.default,
    player_or_team_abbreviation=PlayerOrTeamAbbreviation.default,
    season='2022-23',
    season_type_all_star=SeasonTypeAllStar.default,
    sorter=Sorter.default,
    date_from_nullable="",
    date_to_nullable="",
    proxy=None,
    headers=None,
    timeout=30,
    get_request=True,
)

In [4]:
df = lgl.get_data_frames()[0]

In [5]:
df_all = pd.DataFrame()

seasons = ['2022-23','2023-24','2024-25']

for season in seasons: 
    lgl = LeagueGameLog(
        counter=0,
        direction=Direction.default,
        league_id=LeagueID.default,
        player_or_team_abbreviation=PlayerOrTeamAbbreviation.default,
        season=season,
        season_type_all_star=SeasonTypeAllStar.default,
        sorter=Sorter.default,
        date_from_nullable="",
        date_to_nullable="",
        proxy=None,
        headers=None,
        timeout=30,
        get_request=True,
    )
    df_season = lgl.get_data_frames()[0]
    df_all = pd.concat([df_all, df_season])

In [6]:
df_all.shape, df_all['GAME_ID'].nunique(), df_all['GAME_DATE'].max(), df_all['GAME_DATE'].min()

((7380, 29), 3690, '2025-04-13', '2022-10-18')

In [7]:
# Extract Hustle Box Score and Four Factors
from nba_api.stats.endpoints.hustlestatsboxscore import HustleStatsBoxScore

In [8]:
df_all.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22022,1610612738,BOS,Boston Celtics,22200001,2022-10-18,BOS vs. PHI,W,240,46,82,0.561,12,35,0.343,22,28,0.786,6,30,36,24,8,3,11,24,126,9,1
1,22022,1610612755,PHI,Philadelphia 76ers,22200001,2022-10-18,PHI @ BOS,L,240,40,80,0.5,13,34,0.382,24,28,0.857,4,27,31,16,8,3,14,25,117,-9,1
2,22022,1610612744,GSW,Golden State Warriors,22200002,2022-10-18,GSW vs. LAL,W,240,45,99,0.455,16,45,0.356,17,23,0.739,11,37,48,31,11,4,18,23,123,14,1
3,22022,1610612747,LAL,Los Angeles Lakers,22200002,2022-10-18,LAL @ GSW,L,240,40,94,0.426,10,40,0.25,19,25,0.76,9,39,48,23,12,4,22,18,109,-14,1
4,22022,1610612759,SAS,San Antonio Spurs,22200011,2022-10-19,SAS vs. CHA,L,240,40,99,0.404,7,34,0.206,15,21,0.714,14,30,44,22,8,8,15,19,102,-27,1


In [9]:
hs = HustleStatsBoxScore('0022200001')

In [10]:
hs

<nba_api.stats.endpoints.hustlestatsboxscore.HustleStatsBoxScore at 0x1142aa670>

In [11]:
len(hs.get_data_frames())

3

In [12]:
hs.get_data_frames()[0]

Unnamed: 0,GAME_ID,HUSTLE_STATUS
0,22200001,1


In [13]:
hs.get_data_frames()[1]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MINUTES,PTS,CONTESTED_SHOTS,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,DEFLECTIONS,CHARGES_DRAWN,SCREEN_ASSISTS,SCREEN_AST_PTS,OFF_LOOSE_BALLS_RECOVERED,DEF_LOOSE_BALLS_RECOVERED,LOOSE_BALLS_RECOVERED,OFF_BOXOUTS,DEF_BOXOUTS,BOX_OUT_PLAYER_TEAM_REBS,BOX_OUT_PLAYER_REBS,BOX_OUTS
0,22200001,1610612755,PHI,Philadelphia,202699,Tobias Harris,F,,34:14,18,3,3,0,2,0,0,0,0,0,0,0,1,1,0,1
1,22200001,1610612755,PHI,Philadelphia,200782,P.J. Tucker,F,,33:01,6,3,1,2,5,0,2,5,0,0,0,0,0,0,0,0
2,22200001,1610612755,PHI,Philadelphia,203954,Joel Embiid,C,,37:16,26,8,6,2,1,0,7,14,0,0,0,0,4,4,3,4
3,22200001,1610612755,PHI,Philadelphia,1630178,Tyrese Maxey,G,,38:12,21,3,1,2,2,0,0,0,0,1,1,0,0,0,0,0
4,22200001,1610612755,PHI,Philadelphia,201935,James Harden,G,,37:16,35,3,1,2,2,0,0,0,0,0,0,0,1,1,1,1
5,22200001,1610612755,PHI,Philadelphia,1626149,Montrezl Harrell,,,10:44,2,4,3,1,0,0,0,0,0,0,0,0,0,0,0,0
6,22200001,1610612755,PHI,Philadelphia,1629001,De'Anthony Melton,,,20:33,5,1,0,1,3,0,0,0,0,0,0,0,0,0,0,0
7,22200001,1610612755,PHI,Philadelphia,1627863,Danuel House Jr.,,,16:12,1,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0
8,22200001,1610612755,PHI,Philadelphia,1627777,Georges Niang,,,12:08,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
9,22200001,1610612738,BOS,Boston,1627759,Jaylen Brown,F,,38:34,35,5,3,2,3,0,0,0,0,0,0,0,0,0,0,0


In [14]:
hs.get_data_frames()[2]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MINUTES,PTS,CONTESTED_SHOTS,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,DEFLECTIONS,CHARGES_DRAWN,SCREEN_ASSISTS,SCREEN_AST_PTS,OFF_LOOSE_BALLS_RECOVERED,DEF_LOOSE_BALLS_RECOVERED,LOOSE_BALLS_RECOVERED,OFF_BOXOUTS,DEF_BOXOUTS,BOX_OUT_PLAYER_TEAM_REBS,BOX_OUT_PLAYER_REBS,BOX_OUTS
0,22200001,1610612755,76ers,PHI,Philadelphia,240.0:00,117,27,15,12,16,0,9,19,0,1,1,0,6,6,4,6
1,22200001,1610612738,Celtics,BOS,Boston,237.0:00,126,34,19,15,16,2,10,22,2,2,4,0,2,2,0,2


In [44]:
import random
def fetch_hustle_stats_with_retry(game_id, max_retries=3):
    for attempt in range(max_retries):
        try:
            hustle_stats = HustleStatsBoxScore(game_id)
            return hustle_stats
        except Exception as e:
            if attempt == max_retries - 1:  # Last attempt
                logging.error(f"Failed to fetch game {game_id} after {max_retries} attempts: {e}")
                raise
            
            # Exponential backoff with jitter
            delay = (2 ** attempt) + random.uniform(0, 1)
            logging.warning(f"Attempt {attempt + 1} failed for game {game_id}. Retrying in {delay:.1f}s...")
            time.sleep(delay)

In [51]:
import logging
import time

In [65]:
def fetch_hustle_stats_robust(game_ids):
    results = []
    failed_games = []
    
    for i, game_id in enumerate(game_ids):
        try:
            # Progress logging
            print(f"Processing game {i+1}/{len(game_ids)}: {game_id}")
            
            # Delay between requests
            if i > 0:
                time.sleep(1.5)  # 1.5 second delay
            
            # Try the request
            hustle_stats = HustleStatsBoxScore(game_id)
            results.append(hustle_stats)
            
        except Exception as e:
            print(f"Failed to fetch game {game_id}: {e}")
            failed_games.append(game_id)
    
    # Retry failed games with longer delays
    if failed_games:
        print(f"Retrying {len(failed_games)} failed games with longer delays...")
        for game_id in failed_games:
            try:
                time.sleep(5)  # Longer delay for retries
                hustle_stats = HustleStatsBoxScore(game_id)
                results.append(hustle_stats)
                print(f"Successfully retried game {game_id}")
            except Exception as e:
                print(f"Retry failed for game {game_id}: {e}")
    
    return results

In [47]:
df_all['GAME_ID'].unique()[:3]

array(['0022200001', '0022200002', '0022200011'], dtype=object)

In [66]:
hustle_results = fetch_hustle_stats_robust(game_ids=df_all['GAME_ID'].unique()[:20])

Processing game 1/20: 0022200001
Processing game 2/20: 0022200002
Processing game 3/20: 0022200011
Processing game 4/20: 0022200012
Processing game 5/20: 0022200008
Processing game 6/20: 0022200014
Processing game 7/20: 0022200005
Processing game 8/20: 0022200007
Processing game 9/20: 0022200006
Processing game 10/20: 0022200013
Processing game 11/20: 0022200004
Processing game 12/20: 0022200010
Processing game 13/20: 0022200009
Processing game 14/20: 0022200003
Processing game 15/20: 0022200015
Processing game 16/20: 0022200016
Processing game 17/20: 0022200022
Processing game 18/20: 0022200024
Processing game 19/20: 0022200018
Processing game 20/20: 0022200019


In [67]:
h_list = [hustle_results[i].get_data_frames()[2] for i in range(len(hustle_results))]

In [82]:
print("="*50)
print("="*21 + " df_logs " + "="*20)



In [74]:
pd.concat(h_list)

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MINUTES,PTS,CONTESTED_SHOTS,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,DEFLECTIONS,CHARGES_DRAWN,SCREEN_ASSISTS,SCREEN_AST_PTS,OFF_LOOSE_BALLS_RECOVERED,DEF_LOOSE_BALLS_RECOVERED,LOOSE_BALLS_RECOVERED,OFF_BOXOUTS,DEF_BOXOUTS,BOX_OUT_PLAYER_TEAM_REBS,BOX_OUT_PLAYER_REBS,BOX_OUTS
0,22200001,1610612755,76ers,PHI,Philadelphia,240.0:00,117,27,15,12,16,0,9,19,0,1,1,0,6,6,4,6
1,22200001,1610612738,Celtics,BOS,Boston,237.0:00,126,34,19,15,16,2,10,22,2,2,4,0,2,2,0,2
0,22200002,1610612747,Lakers,LAL,Los Angeles,238.0:00,106,71,35,36,18,1,3,8,5,2,7,2,11,13,7,13
1,22200002,1610612744,Warriors,GSW,Golden State,232.0:00,118,63,30,33,22,1,18,43,1,2,3,3,11,12,6,14
0,22200011,1610612766,Hornets,CHA,Charlotte,239.0:00,129,53,27,26,13,0,7,14,3,2,5,2,4,5,4,6
1,22200011,1610612759,Spurs,SAS,San Antonio,240.0:00,102,52,34,18,15,0,2,4,2,3,5,1,2,3,2,3
0,22200012,1610612743,Nuggets,DEN,Denver,237.0:00,102,57,29,28,19,1,6,13,4,6,10,0,6,4,0,6
1,22200012,1610612762,Jazz,UTA,Utah,238.0:00,123,46,34,12,18,1,8,20,3,3,6,2,4,6,3,6
0,22200008,1610612739,Cavaliers,CLE,Cleveland,228.0:00,105,52,33,19,8,0,7,15,3,2,5,0,7,3,1,7
1,22200008,1610612761,Raptors,TOR,Toronto,240.0:00,108,44,30,14,16,0,4,9,3,5,8,0,4,2,0,4


In [45]:
df_hustle_all = pd.DataFrame()
for idx, game_id in enumerate(df_all['GAME_ID'].unique()):
    
    print(f"Starting Game #: {idx}, Game ID: {game_id}")
    try:
        hsbs = fetch_hustle_stats_with_retry(game_id=game_id, max_retries=3)
        df_game_hustle = hsbs.get_data_frames()[2]
        df_hustle_all = pd.concat([df_hustle_all, df_game_hustle])
        print(f"--> Appended Game ID: {game_id}")
    except Exception as e: 
        print("issue")



Starting Game #: 0, Game ID: 0022200001
--> Appended Game ID: 0022200001
Starting Game #: 1, Game ID: 0022200002
--> Appended Game ID: 0022200002
Starting Game #: 2, Game ID: 0022200011
--> Appended Game ID: 0022200011
Starting Game #: 3, Game ID: 0022200012
--> Appended Game ID: 0022200012
Starting Game #: 4, Game ID: 0022200008
--> Appended Game ID: 0022200008
Starting Game #: 5, Game ID: 0022200014
--> Appended Game ID: 0022200014
Starting Game #: 6, Game ID: 0022200005
--> Appended Game ID: 0022200005
Starting Game #: 7, Game ID: 0022200007
--> Appended Game ID: 0022200007
Starting Game #: 8, Game ID: 0022200006
--> Appended Game ID: 0022200006
Starting Game #: 9, Game ID: 0022200013
--> Appended Game ID: 0022200013
Starting Game #: 10, Game ID: 0022200004
--> Appended Game ID: 0022200004
Starting Game #: 11, Game ID: 0022200010
--> Appended Game ID: 0022200010
Starting Game #: 12, Game ID: 0022200009
--> Appended Game ID: 0022200009
Starting Game #: 13, Game ID: 0022200003
--> App

KeyboardInterrupt: 

In [None]:
df_hustle_all.shape

In [None]:
df_hustle_all.head()

In [None]:
df_hustle_all.tail()

In [37]:
hsbs = HustleStatsBoxScore(game_id='0022200016')

In [40]:
hsbs.get_data_frames()[2]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MINUTES,PTS,CONTESTED_SHOTS,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,DEFLECTIONS,CHARGES_DRAWN,SCREEN_ASSISTS,SCREEN_AST_PTS,OFF_LOOSE_BALLS_RECOVERED,DEF_LOOSE_BALLS_RECOVERED,LOOSE_BALLS_RECOVERED,OFF_BOXOUTS,DEF_BOXOUTS,BOX_OUT_PLAYER_TEAM_REBS,BOX_OUT_PLAYER_REBS,BOX_OUTS
0,22200016,1610612746,Clippers,LAC,LA,240.0:00,103,55,28,27,16,1,10,22,1,1,2,2,1,3,2,3
1,22200016,1610612747,Lakers,LAL,Los Angeles,240.0:00,97,47,29,18,22,1,2,5,1,1,2,0,4,2,2,4


In [15]:
df.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
2455,22022,1610612757,POR,Portland Trail Blazers,22201230,2023-04-09,POR vs. GSW,L,240,38,...,24,30,25,5,3,14,9,101,-56,1
2456,22022,1610612746,LAC,LA Clippers,22201229,2023-04-09,LAC @ PHX,W,240,49,...,39,53,22,3,5,10,18,119,5,1
2457,22022,1610612764,WAS,Washington Wizards,22201222,2023-04-09,WAS vs. HOU,L,240,41,...,37,51,28,11,5,13,21,109,-5,1
2458,22022,1610612747,LAL,Los Angeles Lakers,22201228,2023-04-09,LAL vs. UTA,W,240,47,...,39,49,28,4,7,11,9,128,11,1
2459,22022,1610612756,PHX,Phoenix Suns,22201229,2023-04-09,PHX vs. LAC,L,240,42,...,35,47,29,4,3,7,21,114,-5,1
