In [None]:
import sys
!{sys.executable} -m pip install nba_api
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install matplotlib

In [1]:
import pandas as pd
import numpy as np

##################################################################

Play-by-play

##################################################################

In [None]:
from nba_api.stats.static import teams

nba_teams = teams.get_teams()

spurs = [team for team in nba_teams if team['abbreviation'] == 'SAS'][0]
spurs_id = spurs['id']
print(f'spurs_id: {spurs_id}')

In [None]:
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.library.parameters import Season
from nba_api.stats.library.parameters import SeasonType

gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=spurs_id,
                            season_nullable=Season.default,
                            season_type_nullable=SeasonType.regular)  

games_dict = gamefinder.get_normalized_dict()
games = games_dict['LeagueGameFinderResults']
game_ids = [game["GAME_ID"] for game in games]
print(game_ids)

In [4]:
from nba_api.stats.endpoints import playbyplayv3

df_list = [playbyplayv3.PlayByPlayV3(id).get_data_frames()[0] for id in game_ids]

In [None]:
df = pd.concat(df_list)
df = df[['gameId', 'actionNumber', 'clock', 'period', 'teamId', 'personId', 
         'playerNameI', 'xLegacy', 'yLegacy', 'shotDistance', 'shotResult', 'isFieldGoal', 
         'scoreHome', 'scoreAway', 'location', 'actionType', 'subType', 'shotValue', 'actionId']]
df.shape[0]

In [None]:
df = df[df["actionType"].isin(["Made Shot", "Missed Shot", "Free Throw"])]
df.shape

Calculate scoreDiff - difference in score from shooting player team perspective before shot is taken.

In [16]:
df["scoreHome"].replace('', '0', inplace=True)
df["scoreAway"].replace('', '0', inplace=True)

In [None]:
df["scoreHome"] = df["scoreHome"].astype(int)
df["scoreAway"] = df["scoreAway"].astype(int)

In [18]:
def fill_scores(group):
    # Replace NaN in the first row with 0
    if pd.isna(group.iloc[0]['scoreHome']):
        group.iloc[0, group.columns.get_loc('scoreHome')] = 0.0
    if pd.isna(group.iloc[0]['scoreAway']):
        group.iloc[0, group.columns.get_loc('scoreAway')] = 0.0
    # Forward-fill the rest
    group['scoreHome'] = group['scoreHome'].ffill()
    group['scoreAway'] = group['scoreAway'].ffill()
    return group

# Apply the function to each gameId group
df = df.groupby('gameId', group_keys=False).apply(fill_scores)

In [None]:
df.head()

In [None]:
df["scoreDiff"] = np.where(
    df["location"] == "h",
    df["scoreHome"] - df["scoreAway"],
    df["scoreAway"] - df["scoreHome"]
)

print(df[['gameId', 'actionNumber', 'scoreHome', 'scoreAway', 'scoreDiff', 'actionType', 'location']].head(20))

In [21]:
def shift_score_diff(group):
    group = group.copy()
    group['scoreDiff'] = group['scoreDiff'].shift(1)  # Shift up to reflect "before shot"
    group.iloc[0, group.columns.get_loc('scoreDiff')] = 0.0      # First row should be 0
    return group

df = df.groupby('gameId', group_keys=False).apply(shift_score_diff)

def mirror_diff_on_location_switch(group):
    group = group.copy()
    # Compare current row's location to previous row's
    group['locationChanged'] = group['location'] != group['location'].shift(1)
    
    # Mirror scoreDiff only if location changed and scoreDiff != 0
    group['scoreDiff'] = np.where(
        (group['locationChanged']) & (group['scoreDiff'] != 0),
        -group['scoreDiff'],
        group['scoreDiff']
    )
    
    group.drop(columns='locationChanged', inplace=True)  # optional cleanup
    return group

df = df.groupby('gameId', group_keys=False).apply(mirror_diff_on_location_switch)

In [22]:
df = df[df["actionType"].isin(["Made Shot", "Missed Shot"])]

In [None]:
def clock_to_seconds(clock_str):
    parts = clock_str.replace('PT', '').replace('S', '').split('M')
    minutes = int(parts[0].replace('M', ''))
    seconds = float(parts[1])
    return minutes * 60 + seconds

df['secondsRemaining'] = df['clock'].apply(clock_to_seconds)

# Define clutchFlag
df['clutchFlag'] = df.apply(
    lambda row: 1 if (
        row['period'] in [4, 5] and
        row['secondsRemaining'] <= 300 and
        abs(row['scoreDiff']) <= 5
    ) else 0,
    axis=1
)

df['clutchFlag'].value_counts()

In [None]:
df.drop(columns=["period", "clock", "secondsRemaining"], inplace=True)
df.columns

In [None]:
df["shotResult"] = df["shotResult"].replace({'Made': 1, 'Missed': 0})

In [None]:
df.head()

In [33]:
df.to_csv("data/pbp_spurs.csv", index=False)

##################################################################

2. SHOT CHART

##################################################################

In [None]:
import pandas as pd

In [None]:
pbp = pd.read_csv('data/pbp_spurs.csv')
player_ids = pbp['personId'].unique()
len(player_ids)

In [3]:
from nba_api.stats.endpoints import shotchartdetail

df_list = [shotchartdetail.ShotChartDetail(player_id=id, season_nullable='2024-25', team_id=0, context_measure_simple='FGA').get_data_frames()[0] for id in player_ids]

In [None]:
df_detail = pd.concat(df_list)
df_detail.shape[0]

In [None]:
df_detail.groupby(['SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).size().reset_index()

In [None]:
from nba_api.stats.endpoints import shotchartleaguewide

df_league = shotchartleaguewide.ShotChartLeagueWide(season='2024-25').get_data_frames()[0]
df_league

In [None]:
df_league.groupby(['SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).size().reset_index()

Merge detail/league for FG_PCT from area.

In [None]:
df_chart = pd.merge(df_detail, df_league, how="inner", on=["SHOT_ZONE_BASIC", "SHOT_ZONE_AREA", "SHOT_ZONE_RANGE"])
df_chart.shape

In [None]:
df_chart.columns

In [None]:
df_chart.drop(columns=["GRID_TYPE_x", "TEAM_NAME", "PERIOD", "MINUTES_REMAINING",
       "SECONDS_REMAINING", "GAME_DATE", "HTM", "VTM", "GRID_TYPE_y", "FGA", "FGM"], inplace=True)

df_chart.head()

In [12]:
df_chart = df_chart.rename(columns={"FG_PCT": "ZONE_FG_PCT"})

In [13]:
df_chart.to_csv("data/chart_spurs.csv", index=False)

##################################################################

3. Player pt shots

##################################################################

In [2]:
import pandas as pd

In [3]:
df_pbp = pd.read_csv("data/pbp_spurs.csv")

In [None]:
df_player_teams = df_pbp.groupby(['teamId', 'personId']).size().reset_index()
df_player_teams.shape

In [None]:
from nba_api.stats.endpoints import playerdashptshots

df_list = [playerdashptshots.PlayerDashPtShots(team_id=row["teamId"], player_id=row["personId"]).get_data_frames()[0] for index, row in df_player_teams.iterrows()]
df_ptshots = pd.concat(df_list, ignore_index=True)
df_ptshots.shape

In [6]:
df_ptshots.to_csv("data/ptshots_spurs.csv", index=False)

##################################################################

Create final dataset

##################################################################

In [None]:
df_pbp = pd.read_csv("data/pbp_spurs.csv")
df_chart = pd.read_csv("data/chart_spurs.csv")

In [None]:
df_pbp.head()

In [None]:
df_chart.head()

In [None]:
print(f"pbp columns: {df_pbp.shape[0]}, chart columns: {df_chart.shape[0]}")

In [None]:
df_final = pd.merge(df_pbp, df_chart, how="inner", left_on=["gameId", "actionNumber", "personId"], right_on=["GAME_ID", "GAME_EVENT_ID", "PLAYER_ID"])
df_final.shape

In [None]:
df_final.columns

In [None]:
(df_final["shotResult"] == df_final["SHOT_MADE_FLAG"]).all()

In [None]:
df_final[df_final["shotDistance"] != df_final["SHOT_DISTANCE"]][["shotDistance", "SHOT_DISTANCE"]].head()

In [None]:
df_final["SHOT_ATTEMPTED_FLAG"].value_counts()

In [None]:
df_final.drop(columns=['gameId', 'actionNumber', 'teamId', 'personId',
       'playerNameI', 'xLegacy', 'yLegacy', 'shotDistance', 'shotResult',
       'isFieldGoal', 'scoreHome', 'scoreAway', 'location', 'actionType',
       'subType', 'EVENT_TYPE', 'SHOT_TYPE', 'SHOT_ATTEMPTED_FLAG'], inplace=True)

In [18]:
df_final.rename(columns={"shotValue": "SHOT_VALUE", "actionId": "ACTION_ID", "scoreDiff": "SCORE_DIFF", "clutchFlag": "CLUTCH_FLAG"}, inplace=True)

In [None]:
df_final.head()

In [None]:
df_final.to_csv("data/pbp-chart_spurs.csv", index=False)

In [None]:
df_ptshots = pd.read_csv("data/ptshots_spurs.csv")
df_ptshots.columns

In [None]:
df_ptshots = df_ptshots.sort_values('GP', ascending=False).drop_duplicates(subset='PLAYER_ID', keep='first')
df_ptshots.shape

In [25]:
df_ptshots.drop(columns=["PLAYER_NAME_LAST_FIRST", "SORT_ORDER", "GP", "G",
       "SHOT_TYPE"], inplace=True)

In [None]:
df_final.shape

In [None]:
df_final_shots = pd.merge(df_final, df_ptshots, how="inner", on="PLAYER_ID")
df_final_shots.shape

In [None]:
df_final_shots['PLAYER_SHOT_PCT'] = df_final_shots.apply(
    lambda row: row['FG2_PCT'] if row['SHOT_VALUE'] == 2 else row['FG3_PCT'] if row['SHOT_VALUE'] == 3 else None,
    axis=1
)

df_final_shots.drop(columns=["FG2_PCT", "FG3_PCT"], inplace=True)

In [None]:
df_final_shots.columns

In [None]:
df_final_shots.drop(columns=["ACTION_ID", "GAME_EVENT_ID"])

In [33]:
df_final_shots.to_csv("../final_data_spurs.csv", index=False)