In [62]:
from nba_api.stats.endpoints import playbyplayv3, leaguegamefinder
import pandas as pd
import pyarrow
import os
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', None)

In [76]:
# Retrieve all games from 2023-24 season
season = "2023-24"  # Modify this as needed
gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season)
games_df = gamefinder.get_data_frames()[0]

In [81]:
# Filter for specific team, extract list of games
team = "DAL"
team_games = games_df[games_df['TEAM_ABBREVIATION'] == f"{team}"]
game_ids = team_games['GAME_ID'].to_list()

In [85]:
# Confirm directory exists or create new directory
directory = f'../data/raw/{team}'
if not os.path.exists(directory):
    os.makedirs(directory)
    print(f"new directory made: {directory}")
else:
    print(f"Directory exists: {directory}")

Directory exists: ../data/raw/DAL


In [90]:
# Retrieve and save parquet file for each play-by-play game data
unprocessed = game_ids.copy()
attempt = 0

while (attempt < 3) & (len(unprocessed)>0):
    attempt += 1
    for game_id in unprocessed[:]:
        ind = unprocessed.index(game_id)
        file_name = f"{team}_{game_id}.parquet"
        file_path = os.path.join(directory,file_name)
        if os.path.exists(file_path):
            print(f"Path already found: {file_path}")
            del unprocessed[ind]
            continue
        try:
            data = playbyplayv3.PlayByPlayV3(game_id=f"{game_id}")
            df = data.get_data_frames()[0]
            df.to_parquet(file_path, index=False)
            print(f"Game Successfully Processed: {game_id}" )
            del unprocessed[ind]
        except:
            print(f"FAILED: Unable to process data from game_id: {game_id}.")

if len(unprocessed) > 0:
    [print(f"Failed to process {game_id}") for game_id in unprocessed]
else:
    print("All games processed successfully.")


Path already found: ../data/raw/DAL/DAL_0042300405.parquet
Path already found: ../data/raw/DAL/DAL_0042300404.parquet
Path already found: ../data/raw/DAL/DAL_0042300403.parquet
Path already found: ../data/raw/DAL/DAL_0042300402.parquet
Path already found: ../data/raw/DAL/DAL_0042300401.parquet
Path already found: ../data/raw/DAL/DAL_0042300315.parquet
Path already found: ../data/raw/DAL/DAL_0042300314.parquet
Path already found: ../data/raw/DAL/DAL_0042300313.parquet
Path already found: ../data/raw/DAL/DAL_0042300312.parquet
Path already found: ../data/raw/DAL/DAL_0042300311.parquet
Path already found: ../data/raw/DAL/DAL_0042300226.parquet
Path already found: ../data/raw/DAL/DAL_0042300225.parquet
Path already found: ../data/raw/DAL/DAL_0042300224.parquet
Path already found: ../data/raw/DAL/DAL_0042300223.parquet
Path already found: ../data/raw/DAL/DAL_0042300222.parquet
Path already found: ../data/raw/DAL/DAL_0042300221.parquet
Path already found: ../data/raw/DAL/DAL_0042300176.parqu

In [7]:
df['actionType'].value_counts()

actionType
Rebound           103
Missed Shot        94
Made Shot          73
Substitution       52
Foul               35
Free Throw         33
Turnover           22
                   19
Timeout            11
period              8
Jump Ball           3
Instant Replay      2
Name: count, dtype: int64

In [None]:
df[['gameId', 'clock', 'period', 'teamId', 'personId', 'isFieldGoal', 'shotResult', 'scoreHome', 'scoreAway', 'description', 'actionType', 'shotValue']]

In [18]:
df.loc[21, 'description']

"Holiday 2' Driving Finger Roll Layup (4 PTS) (Tatum 2 AST)"