In [31]:
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams
from nba_api.stats.endpoints import boxscoretraditionalv2
import pandas as pd
import datetime
import time

In [32]:
import pandas as pd

# get all NBA teams
nba_teams = teams.get_teams()
teams_df = pd.DataFrame(nba_teams)

# save team IDs and names
team_ids = teams_df['id'].tolist()

teams_df.head()


Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


In [33]:
# get current season
current_year = datetime.datetime.now().year
current_month = datetime.datetime.now().month
season_id = f"2{current_year - 1}" if current_month < 10 else f"2{current_year}"

# get schedules for all seasons
all_games = []

for team_id in team_ids:
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id)
    games = gamefinder.get_data_frames()[0]
    games_current_season = games[games['SEASON_ID'] == season_id]  # Filter for current season
    all_games.append(games_current_season)

# collect team schedules into single dataframe
games_df = pd.concat(all_games, ignore_index=True)

# save data and view
games_df.to_csv(f'games_{season_id}.csv', index=False)
games_df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22024,1610612737,ATL,Atlanta Hawks,22400506,2025-01-07,ATL @ UTA,W,240,124,...,0.8,12.0,32.0,44.0,35,6.0,11,10,22,3.0
1,22024,1610612737,ATL,Atlanta Hawks,22400486,2025-01-04,ATL @ LAC,L,240,105,...,0.765,8.0,35.0,43.0,30,11.0,4,22,12,-26.0
2,22024,1610612737,ATL,Atlanta Hawks,22400477,2025-01-03,ATL @ LAL,L,240,102,...,0.778,13.0,26.0,39.0,22,10.0,3,8,22,-17.0
3,22024,1610612737,ATL,Atlanta Hawks,22400461,2025-01-01,ATL @ DEN,L,240,120,...,0.667,12.0,32.0,44.0,34,10.0,5,13,16,-19.0
4,22024,1610612737,ATL,Atlanta Hawks,22400438,2024-12-29,ATL @ TOR,W,240,136,...,0.743,14.0,26.0,40.0,30,22.0,8,14,19,29.0


In [34]:
# get player stats
all_player_stats = []

for index, row in games_df.iterrows():
    game_id = row['GAME_ID']
    try:
        # fetch box score for the game
        boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
        player_stats = boxscore.player_stats.get_data_frame()

        # add game context to player stats
        player_stats['GAME_ID'] = game_id
        player_stats['GAME_DATE'] = row['GAME_DATE']
        player_stats['TEAM_NAME'] = row['TEAM_NAME']
        all_player_stats.append(player_stats)

        # respect API rate limits
        time.sleep(1)

    except Exception as e:
        print(f"Error fetching player stats for game {game_id}: {e}")

# collect player stats into single dataframe
player_stats_df = pd.concat(all_player_stats, ignore_index=True)

# save data and view
player_stats_df.to_csv(f'player_stats_{season_id}.csv', index=False)
player_stats_df.head()

Error fetching player stats for game 0022400217: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 0022400179: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 0022400049: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 0022400232: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 0022400168: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 1522400073: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 0022400282: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching player stats for game 0022401221:

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,GAME_DATE,TEAM_NAME
0,22400506,1610612737,ATL,Atlanta,1630249,Vít Krejčí,Vít,F,,22.000000:15,...,4.0,0.0,1.0,0.0,0.0,4.0,13.0,7.0,2025-01-07,Atlanta Hawks
1,22400506,1610612737,ATL,Atlanta,1642258,Zaccharie Risacher,Zaccharie,F,,23.000000:53,...,6.0,0.0,0.0,0.0,0.0,2.0,14.0,8.0,2025-01-07,Atlanta Hawks
2,22400506,1610612737,ATL,Atlanta,203991,Clint Capela,Clint,C,,25.000000:15,...,6.0,0.0,0.0,3.0,1.0,2.0,18.0,-1.0,2025-01-07,Atlanta Hawks
3,22400506,1610612737,ATL,Atlanta,1630700,Dyson Daniels,Dyson,G,,35.000000:11,...,6.0,7.0,2.0,2.0,1.0,4.0,16.0,2.0,2025-01-07,Atlanta Hawks
4,22400506,1610612737,ATL,Atlanta,1629027,Trae Young,Trae,G,,36.000000:34,...,2.0,20.0,0.0,1.0,2.0,1.0,24.0,4.0,2025-01-07,Atlanta Hawks


In [35]:
player_stats_df.columns

Index(['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'PLAYER_ID',
       'PLAYER_NAME', 'NICKNAME', 'START_POSITION', 'COMMENT', 'MIN', 'FGM',
       'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
       'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TO', 'PF', 'PTS',
       'PLUS_MINUS', 'GAME_DATE', 'TEAM_NAME'],
      dtype='object')

In [47]:
# merge datasets on GAME_ID
merged_df = pd.merge(player_stats_df, games_df, on=['GAME_ID','TEAM_ID'], suffixes=('', '_game'), how='left')

# save and view
merged_df.to_csv(f'merged_stats_{season_id}.csv', index=False)
merged_df.head(8)

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,FT_PCT_game,OREB_game,DREB_game,REB_game,AST_game,STL_game,BLK_game,TOV,PF_game,PLUS_MINUS_game
0,22400506,1610612737,ATL,Atlanta,1630249,Vít Krejčí,Vít,F,,22.000000:15,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
1,22400506,1610612737,ATL,Atlanta,1642258,Zaccharie Risacher,Zaccharie,F,,23.000000:53,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
2,22400506,1610612737,ATL,Atlanta,203991,Clint Capela,Clint,C,,25.000000:15,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
3,22400506,1610612737,ATL,Atlanta,1630700,Dyson Daniels,Dyson,G,,35.000000:11,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
4,22400506,1610612737,ATL,Atlanta,1629027,Trae Young,Trae,G,,36.000000:34,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
5,22400506,1610612737,ATL,Atlanta,1629631,De'Andre Hunter,De'Andre,,,27.000000:26,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
6,22400506,1610612737,ATL,Atlanta,1630168,Onyeka Okongwu,Onyeka,,,22.000000:44,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0
7,22400506,1610612737,ATL,Atlanta,203992,Bogdan Bogdanović,Bogdan,,,28.000000:03,...,0.8,12.0,32.0,44.0,35.0,6.0,11.0,10.0,22.0,3.0


np.float64(13.0)