In [1]:
import time
import pandas as pd
from tqdm import tqdm
from nba_api.stats.endpoints import leaguegamefinder, playbyplayv2

In [6]:
# ========== Parameters ==========
SEASON = "2020-21"  # Change to desired season
SEASON_TYPE = "Regular Season"  # Usually "Regular Season" or "Playoffs"
OUTPUT_FILE = f"nba_playbyplay_{SEASON.replace('-', '')}.csv"

# ========== Step 1: Get all games in the season ==========
print(f"Getting all {SEASON_TYPE} games for season {SEASON}...")
game_finder = leaguegamefinder.LeagueGameFinder(
    season_nullable=SEASON,
    season_type_nullable=SEASON_TYPE
)
df_games = game_finder.get_data_frames()[0]
game_ids = df_games["GAME_ID"].unique()

print(f"Found {len(game_ids)} games in {SEASON} {SEASON_TYPE}.")

# ========== Step 2: Loop through games and get play-by-play ==========
all_pbp = []

for game_id in tqdm(game_ids, desc="Downloading play-by-play data"):
    try:
        pbp = playbyplayv2.PlayByPlayV2(game_id=game_id)
        df_pbp = pbp.get_data_frames()[0]
        df_pbp["GAME_ID"] = game_id
        all_pbp.append(df_pbp)
        time.sleep(0.6)  # To avoid hitting API limits
    except Exception as e:
        print(f"Error fetching play-by-play for game {game_id}: {e}")
        time.sleep(1)

# ========== Step 3: Combine and save ==========
if all_pbp:
    df_all_pbp = pd.concat(all_pbp, ignore_index=True)
    df_all_pbp.to_csv(OUTPUT_FILE, index=False)
    print(f"Saved play-by-play data for {len(game_ids)} games to {OUTPUT_FILE}")
else:
    print("No play-by-play data collected.")


Getting all Regular Season games for season 2020-21...
Found 1215 games in 2020-21 Regular Season.


Downloading play-by-play data: 100%|██████████| 1215/1215 [18:33<00:00,  1.09it/s]


Saved play-by-play data for 1215 games to nba_playbyplay_202021.csv


In [12]:
df_all_pbp.describe()

Unnamed: 0,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,PERSON1TYPE,PLAYER1_ID,PLAYER1_TEAM_ID,PERSON2TYPE,PLAYER2_ID,PLAYER2_TEAM_ID,PERSON3TYPE,PLAYER3_ID,PLAYER3_TEAM_ID,VIDEO_AVAILABLE_FLAG
count,562044.0,562044.0,562044.0,562044.0,562039.0,562044.0,516261.0,562044.0,562044.0,185344.0,562044.0,562044.0,13933.0,562044.0
mean,335.968704,3.960775,17.393261,2.536784,4.281235,97480950.0,1610848000.0,1.485416,354885.7,1610859000.0,0.231395,270376.2,1610837000.0,0.728025
std,195.540746,2.705761,30.796621,1.134678,0.932876,382005000.0,661508.4,2.137039,645757.4,674943.7,0.757724,19809400.0,647740.9,0.444978
min,2.0,1.0,0.0,1.0,0.0,0.0,1610613000.0,0.0,0.0,1610613000.0,0.0,0.0,1610613000.0,0.0
25%,167.0,2.0,0.0,2.0,4.0,203469.0,1610613000.0,0.0,0.0,1610613000.0,0.0,0.0,1610613000.0,0.0
50%,335.0,4.0,1.0,3.0,4.0,1627827.0,1610613000.0,0.0,0.0,1610613000.0,0.0,0.0,1610613000.0,1.0
75%,502.0,5.0,12.0,4.0,5.0,1629631.0,1610613000.0,4.0,203145.0,1610613000.0,0.0,0.0,1610613000.0,1.0
max,846.0,18.0,110.0,6.0,7.0,1612710000.0,1612710000.0,5.0,1630466.0,1612710000.0,5.0,1612710000.0,1612710000.0,1.0
