In [1]:
import pandas as pd

In [2]:
all_shots = pd.read_csv("shots/shots.csv")

In [3]:
all_shots

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,...,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM
0,Shot Chart Detail,21500014,20,202083,Wesley Matthews,1610612742,Dallas Mavericks,1,9,4,...,Right Side Center(RC),24+ ft.,24,78,228,1,0,20151028,PHX,DAL
1,Shot Chart Detail,21500014,168,202083,Wesley Matthews,1610612742,Dallas Mavericks,2,11,2,...,Left Side Center(LC),24+ ft.,25,-232,106,1,0,20151028,PHX,DAL
2,Shot Chart Detail,21500014,217,202083,Wesley Matthews,1610612742,Dallas Mavericks,2,8,10,...,Right Side Center(RC),24+ ft.,24,177,174,1,1,20151028,PHX,DAL
3,Shot Chart Detail,21500014,340,202083,Wesley Matthews,1610612742,Dallas Mavericks,3,10,57,...,Right Side Center(RC),24+ ft.,24,222,111,1,1,20151028,PHX,DAL
4,Shot Chart Detail,21500014,358,202083,Wesley Matthews,1610612742,Dallas Mavericks,3,9,33,...,Right Side(R),16-24 ft.,21,182,110,1,0,20151028,PHX,DAL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205533,Shot Chart Detail,21501194,66,202693,Markieff Morris,1610612764,Washington Wizards,1,4,24,...,Center(C),Less Than 8 ft.,2,-19,-16,1,1,20160410,WAS,CHA
205534,Shot Chart Detail,21501194,81,202693,Markieff Morris,1610612764,Washington Wizards,1,3,20,...,Center(C),8-16 ft.,9,-29,95,1,1,20160410,WAS,CHA
205535,Shot Chart Detail,21501194,301,202693,Markieff Morris,1610612764,Washington Wizards,3,10,18,...,Center(C),24+ ft.,27,-60,267,1,0,20160410,WAS,CHA
205536,Shot Chart Detail,21501194,327,202693,Markieff Morris,1610612764,Washington Wizards,3,8,13,...,Center(C),16-24 ft.,18,-17,183,1,1,20160410,WAS,CHA


In [4]:
for col in all_shots.columns:
    print(col)

GRID_TYPE
GAME_ID
GAME_EVENT_ID
PLAYER_ID
PLAYER_NAME
TEAM_ID
TEAM_NAME
PERIOD
MINUTES_REMAINING
SECONDS_REMAINING
EVENT_TYPE
ACTION_TYPE
SHOT_TYPE
SHOT_ZONE_BASIC
SHOT_ZONE_AREA
SHOT_ZONE_RANGE
SHOT_DISTANCE
LOC_X
LOC_Y
SHOT_ATTEMPTED_FLAG
SHOT_MADE_FLAG
GAME_DATE
HTM
VTM


In [5]:
# Removing variables
cols_to_drop = [
    "GRID_TYPE",
    "SHOT_ATTEMPTED_FLAG",
    "GAME_ID",
    "GAME_EVENT_ID",
    "TEAM_ID",
    "EVENT_TYPE",
    "GAME_DATE",
    "PLAYER_NAME",
    "SHOT_DISTANCE",
    "HTM",
    "VTM",
    "LOC_X",
    "LOC_Y", # distance and region is more valuable than x and y locations, much simpler, dont have to consider side changes
]

# Removing redundant variables and those which would be too nuanced to fit into a coaching plan 

all_shots = all_shots.drop(columns=cols_to_drop, errors='ignore')

In [6]:
# Convert shot type to numeric shot value
all_shots["SHOT_VALUE"] = all_shots["SHOT_TYPE"].apply(lambda x: 3 if "3PT" in x else 2)

# Get time to end of period in seconds
all_shots["TIME_LEFT_SEC"] = all_shots["MINUTES_REMAINING"] * 60 + all_shots["SECONDS_REMAINING"]

all_shots = all_shots.drop(columns=["MINUTES_REMAINING", "SECONDS_REMAINING", "SHOT_TYPE"], errors = 'ignore')

In [7]:
all_shots

Unnamed: 0,TEAM_NAME,PERIOD,ACTION_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_MADE_FLAG,SHOT_VALUE,TIME_LEFT_SEC
0,Dallas Mavericks,1,Jump Shot,Above the Break 3,Right Side Center(RC),24+ ft.,0,3,544
1,Dallas Mavericks,2,Jump Shot,Above the Break 3,Left Side Center(LC),24+ ft.,0,3,662
2,Dallas Mavericks,2,Jump Shot,Above the Break 3,Right Side Center(RC),24+ ft.,1,3,490
3,Dallas Mavericks,3,Jump Shot,Above the Break 3,Right Side Center(RC),24+ ft.,1,3,657
4,Dallas Mavericks,3,Jump Shot,Mid-Range,Right Side(R),16-24 ft.,0,2,573
...,...,...,...,...,...,...,...,...,...
205533,Washington Wizards,1,Tip Layup Shot,Restricted Area,Center(C),Less Than 8 ft.,1,2,264
205534,Washington Wizards,1,Jump Shot,In The Paint (Non-RA),Center(C),8-16 ft.,1,2,200
205535,Washington Wizards,3,Jump Shot,Above the Break 3,Center(C),24+ ft.,0,3,618
205536,Washington Wizards,3,Pullup Jump shot,Mid-Range,Center(C),16-24 ft.,1,2,493


In [8]:
for col in all_shots.columns:
    print(col)

TEAM_NAME
PERIOD
ACTION_TYPE
SHOT_ZONE_BASIC
SHOT_ZONE_AREA
SHOT_ZONE_RANGE
SHOT_MADE_FLAG
SHOT_VALUE
TIME_LEFT_SEC


In [9]:
# Combine shot zones into one
all_shots['SHOT_ZONE'] = all_shots['SHOT_ZONE_BASIC'] + " - " + all_shots['SHOT_ZONE_AREA'] + " - " + all_shots['SHOT_ZONE_RANGE']
all_shots = all_shots.drop(columns=["SHOT_ZONE_BASIC","SHOT_ZONE_AREA","SHOT_ZONE_RANGE"], errors = 'ignore')

# remove backcourt, not worth analysing
all_shots = all_shots[~all_shots["SHOT_ZONE"].str.contains("Back Court")]
all_shots = all_shots[~all_shots["SHOT_ZONE"].str.contains("Backcourt")]

# Optional: check remaining zones
all_shots["SHOT_ZONE"].value_counts()

all_shots["SHOT_ZONE"].value_counts()

SHOT_ZONE
Restricted Area - Center(C) - Less Than 8 ft.          66762
In The Paint (Non-RA) - Center(C) - Less Than 8 ft.    19173
Above the Break 3 - Left Side Center(LC) - 24+ ft.     16458
Above the Break 3 - Right Side Center(RC) - 24+ ft.    15660
Above the Break 3 - Center(C) - 24+ ft.                10785
Mid-Range - Right Side Center(RC) - 16-24 ft.           8028
Left Corner 3 - Left Side(L) - 24+ ft.                  7759
Mid-Range - Left Side Center(LC) - 16-24 ft.            7635
Mid-Range - Right Side(R) - 8-16 ft.                    7552
Mid-Range - Left Side(L) - 8-16 ft.                     7544
Right Corner 3 - Right Side(R) - 24+ ft.                7276
Mid-Range - Center(C) - 16-24 ft.                       7214
In The Paint (Non-RA) - Center(C) - 8-16 ft.            6756
Mid-Range - Left Side(L) - 16-24 ft.                    5383
Mid-Range - Right Side(R) - 16-24 ft.                   4869
Mid-Range - Center(C) - 8-16 ft.                        2228
In The Paint (

In [10]:
all_shots["ACTION_TYPE"].value_counts()

ACTION_TYPE
Jump Shot                             96966
Layup Shot                            17988
Driving Layup Shot                    12887
Pullup Jump shot                      12102
Floating Jump shot                     5175
Hook Shot                              4627
Step Back Jump shot                    4414
Tip Layup Shot                         4178
Running Layup Shot                     3563
Turnaround Jump Shot                   3548
Dunk Shot                              3246
Cutting Layup Shot                     3225
Fadeaway Jump Shot                     2934
Driving Finger Roll Layup Shot         2194
Driving Floating Jump Shot             2183
Putback Layup Shot                     1955
Reverse Layup Shot                     1911
Running Jump Shot                      1786
Turnaround Hook Shot                   1763
Jump Bank Shot                         1757
Turnaround Fadeaway shot               1633
Cutting Dunk Shot                      1494
Alley Oop Dunk Shot 

In [11]:
# Categorize Action Types
jump_shots = ["Jump Shot", "Running Jump Shot", "Pullup Jump shot", "Step Back Jump shot",
              "Floating Jump shot", "Turnaround Jump Shot", "Running Pull-Up Jump Shot",
              "Turnaround Fadeaway Bank Jump Shot", "Driving Jump shot", "Turnaround Fadeaway shot",
              "Fadeaway Jump Shot", "Driving Floating Jump Shot", "Turnaround Fadeaway Bank Jump Shot"]

bank_shots = ["Jump Bank Shot", "Turnaround Bank shot", "Driving Floating Bank Jump Shot",
              "Step Back Bank Jump Shot", "Fadeaway Bank shot", "Hook Bank Shot", "Driving Bank shot",
             "Pullup Bank shot"]

layups = ["Layup Shot", "Driving Layup Shot", "Running Layup Shot", "Cutting Layup Shot",
          "Finger Roll Layup Shot", "Putback Layup Shot", "Alley Oop Layup shot",
          "Driving Reverse Layup Shot", "Running Finger Roll Layup Shot", "Running Alley Oop Layup Shot",
          "Running Reverse Layup Shot", "Reverse Layup Shot", "Driving Finger Roll Layup Shot",
          "Tip Layup Shot", "Cutting Finger Roll Layup Shot"]

dunks = ["Dunk Shot", "Driving Dunk Shot", "Putback Dunk Shot", "Alley Oop Dunk Shot",
         "Running Dunk Shot", "Reverse Dunk Shot", "Tip Dunk Shot", "Running Alley Oop Dunk Shot",
         "Running Reverse Dunk Shot", "Cutting Dunk Shot", "Driving Reverse Dunk Shot"]

hooks = ["Driving Hook Shot", "Turnaround Hook Shot", "Turnaround Bank Hook Shot", "Driving Bank Hook Shot",
         "Running Hook Shot", "Hook Shot", "Hook Bank Shot"]

# Map function
def categorize_shot(action):
    if action in jump_shots:
        return "Jump Shot"
    elif action in bank_shots:
        return "Bank Shot"
    elif action in layups:
        return "Layup"
    elif action in dunks:
        return "Dunk"
    elif action in hooks:
        return "Hook Shot"
    else:
        return "Other"


In [12]:
all_shots["SHOT_CATEGORY"] = all_shots["ACTION_TYPE"].apply(categorize_shot)
all_shots = all_shots.drop(columns=["ACTION_TYPE"], errors = 'ignore')

all_shots = all_shots[~all_shots["SHOT_CATEGORY"].str.contains("Other")]

In [13]:
all_shots["SHOT_CATEGORY"].value_counts()

SHOT_CATEGORY
Jump Shot    131144
Layup         51996
Dunk          10083
Hook Shot      7566
Bank Shot      4241
Name: count, dtype: int64

In [14]:
rows_before = all_shots.shape[0]

# Drop rows with any missing values
all_shots = all_shots.dropna()

# Count rows after dropping
rows_after = all_shots.shape[0]

# Report how many rows were removed
rows_removed = rows_before - rows_after
print(f"Rows removed due to missing values: {rows_removed}")

Rows removed due to missing values: 0


In [15]:
all_shots

Unnamed: 0,TEAM_NAME,PERIOD,SHOT_MADE_FLAG,SHOT_VALUE,TIME_LEFT_SEC,SHOT_ZONE,SHOT_CATEGORY
0,Dallas Mavericks,1,0,3,544,Above the Break 3 - Right Side Center(RC) - 24...,Jump Shot
1,Dallas Mavericks,2,0,3,662,Above the Break 3 - Left Side Center(LC) - 24+...,Jump Shot
2,Dallas Mavericks,2,1,3,490,Above the Break 3 - Right Side Center(RC) - 24...,Jump Shot
3,Dallas Mavericks,3,1,3,657,Above the Break 3 - Right Side Center(RC) - 24...,Jump Shot
4,Dallas Mavericks,3,0,2,573,Mid-Range - Right Side(R) - 16-24 ft.,Jump Shot
...,...,...,...,...,...,...,...
205533,Washington Wizards,1,1,2,264,Restricted Area - Center(C) - Less Than 8 ft.,Layup
205534,Washington Wizards,1,1,2,200,In The Paint (Non-RA) - Center(C) - 8-16 ft.,Jump Shot
205535,Washington Wizards,3,0,3,618,Above the Break 3 - Center(C) - 24+ ft.,Jump Shot
205536,Washington Wizards,3,1,2,493,Mid-Range - Center(C) - 16-24 ft.,Jump Shot


In [16]:
all_shots.to_csv("all_shots_cleaned.csv", index=False) # CLEANED DATASET

In [603]:
nba_teams = [
    # Eastern Conference
    "Boston Celtics", "Brooklyn Nets", "New York Knicks", "Philadelphia 76ers", "Toronto Raptors",
    "Chicago Bulls", "Cleveland Cavaliers", "Detroit Pistons", "Indiana Pacers", "Milwaukee Bucks",
    "Atlanta Hawks", "Charlotte Hornets", "Miami Heat", "Orlando Magic", "Washington Wizards",
    
    # Western Conference
    "Denver Nuggets", "Minnesota Timberwolves", "Oklahoma City Thunder", "Portland Trail Blazers", "Utah Jazz",
    "Golden State Warriors", "Los Angeles Clippers", "Los Angeles Lakers", "Phoenix Suns", "Sacramento Kings",
    "Dallas Mavericks", "Houston Rockets", "Memphis Grizzlies", "New Orleans Pelicans", "San Antonio Spurs"
]


In [604]:
team_dfs = {team: all_shots[all_shots["TEAM_NAME"] == team].copy() for team in nba_teams}

In [605]:
team_dfs["Boston Celtics"] # example of one teams shots 

Unnamed: 0,TEAM_NAME,PERIOD,SHOT_MADE_FLAG,SHOT_VALUE,TIME_LEFT_SEC,SHOT_ZONE,SHOT_CATEGORY
24703,Boston Celtics,1,0,3,270,Above the Break 3 - Left Side Center(LC) - 24+...,Jump Shot
24704,Boston Celtics,1,1,2,83,Restricted Area - Center(C) - Less Than 8 ft.,Layup
24705,Boston Celtics,1,1,2,3,In The Paint (Non-RA) - Center(C) - Less Than ...,Jump Shot
24706,Boston Celtics,2,1,2,698,Restricted Area - Center(C) - Less Than 8 ft.,Layup
24707,Boston Celtics,3,1,2,636,Restricted Area - Center(C) - Less Than 8 ft.,Layup
...,...,...,...,...,...,...,...
201126,Boston Celtics,4,1,2,310,In The Paint (Non-RA) - Center(C) - Less Than ...,Jump Shot
201127,Boston Celtics,4,1,2,277,Restricted Area - Center(C) - Less Than 8 ft.,Layup
201128,Boston Celtics,4,0,2,224,Mid-Range - Left Side(L) - 16-24 ft.,Jump Shot
201129,Boston Celtics,2,0,2,146,Restricted Area - Center(C) - Less Than 8 ft.,Layup


In [617]:
from sklearn.metrics import accuracy_score, f1_score

# Make sure the same preprocessing is applied
features = ["SHOT_CATEGORY", "SHOT_ZONE"]
target = "SHOT_MADE_FLAG"

# Remove backcourt shots
all_shots_team = all_shots[~all_shots['SHOT_ZONE'].str.contains("Back Court")].copy()

# One-hot encode categorical variables (same as during training)
all_shots_team_encoded = pd.get_dummies(all_shots_team[features])
# Ensure all training columns are present
for col in X.columns:  # X from original model
    if col not in all_shots_team_encoded.columns:
        all_shots_team_encoded[col] = 0
all_shots_team_encoded = all_shots_team_encoded[X.columns]  # Reorder columns

# Unique teams
teams = all_shots_team["TEAM_NAME"].unique()

team_results = []

for team in teams:
    df_team = all_shots_team[all_shots_team["TEAM_NAME"] == team]
    X_team = all_shots_team_encoded.loc[df_team.index]
    y_team = df_team[target]
    
    y_pred = xgb_model.predict(X_team)
    acc = accuracy_score(y_team, y_pred)
    f1 = f1_score(y_team, y_pred)
    
    team_results.append({
        "Team": team,
        "Shots": len(df_team),
        "Accuracy": acc,
        "F1": f1
    })

# Convert to DataFrame
team_results_df = pd.DataFrame(team_results).sort_values(by="Accuracy", ascending=False)
team_results_df.head(5)

# shows accuracy of model for each team

Unnamed: 0,Team,Shots,Accuracy,F1
4,Philadelphia 76ers,6794,0.648219,0.529898
10,Houston Rockets,6812,0.638579,0.545085
26,Boston Celtics,7298,0.637983,0.524307
21,Sacramento Kings,7065,0.632696,0.543536
1,Minnesota Timberwolves,6657,0.630314,0.520179
