In [1]:
!pip install pandas nba_api requests tqdm

Collecting nba_api
  Downloading nba_api-1.11.3-py3-none-any.whl.metadata (5.8 kB)
Downloading nba_api-1.11.3-py3-none-any.whl (318 kB)
Installing collected packages: nba_api
Successfully installed nba_api-1.11.3


In [7]:
import pandas as pd
import time
from nba_api.stats.endpoints import playbyplayv3, leaguegamefinder
from nba_api.stats.static import teams
from tqdm import tqdm  # For progress bar

def get_game_ids(season='2023-24'):
    """
    Fetches a list of game IDs for a specific season.
    Returns a list of dataframes for each season.
    """
    print(f"Fetching games for season {season}...")
    game_finder = leaguegamefinder.LeagueGameFinder(
        season_nullable=season,
        league_id_nullable='00', # '00' is the NBA
        season_type_nullable='Regular Season'
    )
    games = game_finder.get_data_frames()

    return games

def fetch_raw_pbp_data(game_ids):
    """
    Iterates through game IDs and fetches raw PlayByPlayV3 data.
    Returns a list of DataFrames.
    """
    all_games_data = pd.DataFrame()

    print(f"Starting download for {len(game_ids)} games...")
    for game_id in tqdm(game_ids):
        try:
            # Call the PlayByPlayV3 endpoint
            # timeout is essential as the API can sometimes hang
            pbp = playbyplayv3.PlayByPlayV3(game_id=game_id, timeout=30)

            # The API returns a dictionary; we want the 'PlayByPlay' key
            # Note: Accessing the dictionary via get_dict() usually works best for V3
            data_dict = pbp.get_dict()
            game_actions = data_dict['game']['actions']

            df = pd.DataFrame(game_actions)
            df['gameId'] = game_id # Ensure ID is attached to the rows
            all_games_data.append(df)

            # POLITE RATE LIMITING: Sleep to avoid 429 errors
            time.sleep(0.600)

        except Exception as e:
            print(f"Error fetching game {game_id}: {e}")
            continue

    if not all_games_data:
        return pd.DataFrame()

    return pd.concat(all_games_data, ignore_index=True)

# --- EXECUTION BLOCK ---
# Example: Get all Lakers games from the 2023-24 season
target_game_ids = get_game_ids(season='2025-26')

# Test finding games with the first three rows
raw_pbp_df = fetch_raw_pbp_data(target_game_ids[0].head(3))
#
print(f"Extracted {len(raw_pbp_df)} total events.")
print(raw_pbp_df[['gameId', 'actionNumber', 'timeActual', 'scoreHome', 'scoreAway']].head())

Fetching games for season 2025-26...
Starting download for 3 games...


 33%|███▎      | 1/3 [00:00<00:00,  8.23it/s]

Error fetching game SEASON_ID: 'game'


 67%|██████▋   | 2/3 [00:00<00:00,  9.11it/s]

Error fetching game TEAM_ID: 'game'


100%|██████████| 3/3 [00:00<00:00,  7.16it/s]

Error fetching game TEAM_ABBREVIATION: 'game'


4it [00:00,  7.33it/s]                       

Error fetching game TEAM_NAME: 'game'


5it [00:00,  7.81it/s]

Error fetching game GAME_ID: 'game'


6it [00:00,  6.76it/s]

Error fetching game GAME_DATE: 'game'


7it [00:00,  7.53it/s]

Error fetching game MATCHUP: 'game'


8it [00:01,  7.79it/s]

Error fetching game WL: 'game'


9it [00:01,  7.96it/s]

Error fetching game MIN: 'game'


10it [00:01,  8.21it/s]

Error fetching game PTS: 'game'


11it [00:01,  8.52it/s]

Error fetching game FGM: 'game'


12it [00:01,  8.79it/s]

Error fetching game FGA: 'game'


13it [00:01,  9.00it/s]

Error fetching game FG_PCT: 'game'


14it [00:01,  7.40it/s]

Error fetching game FG3M: 'game'


15it [00:01,  7.92it/s]

Error fetching game FG3A: 'game'


16it [00:02,  7.14it/s]

Error fetching game FG3_PCT: 'game'


17it [00:02,  7.38it/s]

Error fetching game FTM: 'game'


18it [00:02,  7.01it/s]

Error fetching game FTA: 'game'


18it [00:04,  3.69it/s]


KeyboardInterrupt: 

In [8]:
target_game_ids[0].columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS'],
      dtype='object')

In [11]:
game_id = target_game_ids[0].head(1)['GAME_ID']
pbp = playbyplayv3.PlayByPlayV3(game_id=game_id)

# The API returns a dictionary; we want the 'PlayByPlay' key
# Note: Accessing the dictionary via get_dict() usually works best for V3
data_dict = pbp.get_dict()
print(data_dict)

{'meta': {'version': 1, 'request': 'http://nba.cloud/games/0022501206/playbyplay?Format=json', 'time': '2025-12-11T23:00:36.036Z'}, 'game': {'gameId': '0022501206', 'videoAvailable': 1, 'actions': [{'actionNumber': 2, 'clock': 'PT12M00.00S', 'period': 1, 'teamId': 0, 'teamTricode': '', 'personId': 0, 'playerName': '', 'playerNameI': '', 'xLegacy': 0, 'yLegacy': 0, 'shotDistance': 0, 'shotResult': '', 'isFieldGoal': 0, 'scoreHome': '0', 'scoreAway': '0', 'pointsTotal': 0, 'location': '', 'description': 'Start of 1st Period (8:11 PM EST)', 'actionType': 'period', 'subType': 'start', 'videoAvailable': 1, 'shotValue': 0, 'actionId': 1}, {'actionNumber': 4, 'clock': 'PT12M00.00S', 'period': 1, 'teamId': 1610612749, 'teamTricode': 'MIL', 'personId': 1626167, 'playerName': 'Turner', 'playerNameI': 'M. Turner', 'xLegacy': 0, 'yLegacy': 0, 'shotDistance': 0, 'shotResult': '', 'isFieldGoal': 0, 'scoreHome': '', 'scoreAway': '', 'pointsTotal': 0, 'location': 'h', 'description': 'Jump Ball Turner 

In [15]:
data_dict['game'].keys()

dict_keys(['gameId', 'videoAvailable', 'actions'])

In [19]:
data_dict['game']['actions'][0]

{'actionNumber': 2,
 'clock': 'PT12M00.00S',
 'period': 1,
 'teamId': 0,
 'teamTricode': '',
 'personId': 0,
 'playerName': '',
 'playerNameI': '',
 'xLegacy': 0,
 'yLegacy': 0,
 'shotDistance': 0,
 'shotResult': '',
 'isFieldGoal': 0,
 'scoreHome': '0',
 'scoreAway': '0',
 'pointsTotal': 0,
 'location': '',
 'description': 'Start of 1st Period (8:11 PM EST)',
 'actionType': 'period',
 'subType': 'start',
 'videoAvailable': 1,
 'shotValue': 0,
 'actionId': 1}

In [28]:
from collections import Counter
actions = data_dict['game']['actions']
unique_distances = Counter(d.get("actionType") for d in actions)
print(list((x, unique_distances[x]) for x in sorted(unique_distances)))

[('', 18), ('Foul', 40), ('Free Throw', 39), ('Heave', 2), ('Instant Replay', 2), ('Jump Ball', 3), ('Made Shot', 80), ('Missed Shot', 87), ('Rebound', 98), ('Substitution', 45), ('Timeout', 10), ('Turnover', 30), ('Violation', 4), ('period', 8)]


In [17]:
seasons = ["2025-26", '2024-25']

dfs = []
for season in seasons:
    print(f"Fetching games for season {season}...")
    game_finder = leaguegamefinder.LeagueGameFinder(
        season_nullable=season,
        league_id_nullable='00', # '00' is the NBA
        season_type_nullable='Regular Season'
    )
    dfs.append(pd.DataFrame(game_finder.get_data_frames()[0]))

df = pd.concat(dfs)
print(df.describe())
df.head()


Fetching games for season 2025-26...
Fetching games for season 2024-25...
            TEAM_ID          MIN          PTS          FGM          FGA  \
count  3.116000e+03  3116.000000  3116.000000  3116.000000  3116.000000   
mean   1.610613e+09   241.416239   114.425866    41.725289    89.213736   
std    8.663802e+00     6.704090    12.674134     5.228483     7.293297   
min    1.610613e+09   169.000000    67.000000    22.000000    60.000000   
25%    1.610613e+09   239.000000   105.000000    38.000000    84.000000   
50%    1.610613e+09   240.000000   114.000000    42.000000    89.000000   
75%    1.610613e+09   241.000000   123.000000    45.000000    94.000000   
max    1.610613e+09   292.000000   162.000000    60.000000   120.000000   

            FG_PCT         FG3M         FG3A      FG3_PCT          FTM  ...  \
count  3116.000000  3116.000000  3116.000000  3116.000000  3116.000000  ...   
mean      0.468703    13.490693    37.471438     0.359893    17.484596  ...   
std       0.0

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22025,1610612738,BOS,Boston Celtics,22500334,2025-12-04,BOS @ WAS,W,241,146,...,0.857,15,31,46,35,14,8,9,20,45.0
1,22025,1610612764,WAS,Washington Wizards,22500334,2025-12-04,WAS vs. BOS,L,240,101,...,0.84,12,25,37,21,5,4,18,22,-45.0
2,22025,1610612744,GSW,Golden State Warriors,22500333,2025-12-04,GSW @ PHI,L,239,98,...,0.75,11,32,43,27,12,4,13,20,-1.0
3,22025,1610612747,LAL,Los Angeles Lakers,22500336,2025-12-04,LAL @ TOR,,227,114,...,0.846,14,21,35,24,7,7,10,16,-0.6
4,22025,1610612761,TOR,Toronto Raptors,22500336,2025-12-04,TOR vs. LAL,,204,115,...,0.8,11,25,36,37,8,6,13,18,0.2
