In [1]:
import pandas as pd
import numpy as np

total_shots = pd.read_csv('NBA_2004_2024_Shots.csv')



In [2]:
total_shots.columns

Index(['SEASON_1', 'SEASON_2', 'TEAM_ID', 'TEAM_NAME', 'PLAYER_ID',
       'PLAYER_NAME', 'POSITION_GROUP', 'POSITION', 'GAME_DATE', 'GAME_ID',
       'HOME_TEAM', 'AWAY_TEAM', 'EVENT_TYPE', 'SHOT_MADE', 'ACTION_TYPE',
       'SHOT_TYPE', 'BASIC_ZONE', 'ZONE_NAME', 'ZONE_ABB', 'ZONE_RANGE',
       'LOC_X', 'LOC_Y', 'SHOT_DISTANCE', 'QUARTER', 'MINS_LEFT', 'SECS_LEFT'],
      dtype='object')

In [3]:
team_dict = {
    'Atlanta Hawks': 'ATL',
    'Boston Celtics': 'BOS',
    'Brooklyn Nets': 'BKN',
    'Charlotte Hornets': 'CHA',
    'Chicago Bulls': 'CHI',
    'Cleveland Cavaliers': 'CLE',
    'Dallas Mavericks': 'DAL',
    'Denver Nuggets': 'DEN',
    'Detroit Pistons': 'DET',
    'Golden State Warriors': 'GSW',
    'Houston Rockets': 'HOU',
    'Indiana Pacers': 'IND',
    'Los Angeles Clippers': 'LAC',
    'Los Angeles Lakers': 'LAL',
    'Memphis Grizzlies': 'MEM',
    'Miami Heat': 'MIA',
    'Milwaukee Bucks': 'MIL',
    'Minnesota Timberwolves': 'MIN',
    'New Orleans Pelicans': 'NOP',
    'New York Knicks': 'NYK',
    'Oklahoma City Thunder': 'OKC',
    'Orlando Magic': 'ORL',
    'Philadelphia 76ers': 'PHI',
    'Portland Trail Blazers': 'POR',
    'Sacramento Kings': 'SAC',
    'San Antonio Spurs': 'SAS',
    'Toronto Raptors': 'TOR',
    'Utah Jazz': 'UTA',
    'Washington Wizards': 'WAS',
    'Phoenix Suns': 'PHX',
    'Brooklyn Nets': 'BKN',
}


total_shots['TEAM_ABBR'] = total_shots['TEAM_NAME'].map(team_dict)


In [4]:
def calculate_points(row):
    if not row['SHOT_MADE']:
        return 0
    elif row['SHOT_TYPE'] == '2PT Field Goal':
        return 2
    elif row['SHOT_TYPE'] == '3PT Field Goal':
        return 3

def calculate_raw_pts(row):
    if row['SHOT_TYPE'] == '2PT Field Goal':
        return 2
    elif row['SHOT_TYPE'] == '3PT Field Goal':
        return 3

total_shots['points_value'] = total_shots.apply(calculate_points, axis=1)
total_shots['raw_points_value'] = total_shots.apply(calculate_raw_pts, axis=1)

total_shots['SHOT_MADE'] = total_shots['SHOT_MADE'].astype(int)
def set_is_home(row):
    return 1 if row['TEAM_ABBR'] == row['HOME_TEAM'] else 0

total_shots['is_home'] = total_shots.apply(set_is_home, axis=1)

total_shots

Unnamed: 0,SEASON_1,SEASON_2,TEAM_ID,TEAM_NAME,PLAYER_ID,PLAYER_NAME,POSITION_GROUP,POSITION,GAME_DATE,GAME_ID,...,LOC_X,LOC_Y,SHOT_DISTANCE,QUARTER,MINS_LEFT,SECS_LEFT,TEAM_ABBR,points_value,raw_points_value,is_home
0,2024,2023-24,1610612764,Washington Wizards,1629673,Jordan Poole,G,SG,11-03-2023,22300003,...,-0.4,17.45,12,1,11,1,WAS,0,2,0
1,2024,2023-24,1610612764,Washington Wizards,1630166,Deni Avdija,F,SF,11-03-2023,22300003,...,1.5,30.55,25,1,10,26,WAS,3,3,0
2,2024,2023-24,1610612764,Washington Wizards,1626145,Tyus Jones,G,PG,11-03-2023,22300003,...,-3.3,6.55,3,1,9,46,WAS,2,2,0
3,2024,2023-24,1610612764,Washington Wizards,1629673,Jordan Poole,G,SG,11-03-2023,22300003,...,-1.0,5.85,1,1,8,30,WAS,2,2,0
4,2024,2023-24,1610612764,Washington Wizards,1626145,Tyus Jones,G,PG,11-03-2023,22300003,...,-0.0,6.25,1,1,8,8,WAS,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4231257,2004,2003-04,1610612755,Philadelphia 76ers,2422,John Salmons,G,SG,10-28-2003,20300001,...,23.3,9.55,23,1,11,21,PHI,3,3,1
4231258,2004,2003-04,1610612759,San Antonio Spurs,1938,Manu Ginobili,G,SG,10-28-2003,20300002,...,15.1,25.85,25,1,11,24,SAS,0,3,1
4231259,2004,2003-04,1610612747,Los Angeles Lakers,406,Shaquille O'Neal,C,C,10-28-2003,20300003,...,-4.7,9.85,6,1,11,39,LAL,0,2,1
4231260,2004,2003-04,1610612756,Phoenix Suns,2063,Jake Voskuhl,C,C,10-28-2003,20300002,...,-2.3,6.05,2,1,11,42,PHX,0,2,0


In [5]:
train_data = total_shots.loc[~(total_shots['SEASON_1'] == 2024)]

test_data = total_shots.loc[(total_shots['SEASON_1'] == 2024)]

In [6]:
train_data_subset = train_data.drop(['SEASON_1', 'SEASON_2', 'TEAM_ID', 'TEAM_NAME', 'PLAYER_ID', 'PLAYER_NAME', 'POSITION', 'GAME_ID', 'GAME_DATE', 'ZONE_ABB', 'HOME_TEAM', 'AWAY_TEAM', 'EVENT_TYPE', 'ZONE_NAME', 'QUARTER', 'MINS_LEFT', 'SECS_LEFT', 'TEAM_ABBR'], axis=1)

train_data_subset

Unnamed: 0,POSITION_GROUP,SHOT_MADE,ACTION_TYPE,SHOT_TYPE,BASIC_ZONE,ZONE_RANGE,LOC_X,LOC_Y,SHOT_DISTANCE,points_value,raw_points_value,is_home
218701,G,1,Jump Shot,3PT Field Goal,Left Corner 3,24+ ft.,23.4,12.95,24,3,3,0
218702,C,0,Step Back Jump shot,3PT Field Goal,Above the Break 3,24+ ft.,-0.4,31.45,26,0,3,0
218703,G,1,Running Finger Roll Layup Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-1.3,5.25,1,2,2,0
218704,C,1,Cutting Dunk Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-0.1,7.25,2,2,2,0
218705,F,1,Cutting Dunk Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,1.8,7.15,2,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...
4231257,G,1,Jump Shot,3PT Field Goal,Left Corner 3,24+ ft.,23.3,9.55,23,3,3,1
4231258,G,0,Jump Shot,3PT Field Goal,Above the Break 3,24+ ft.,15.1,25.85,25,0,3,1
4231259,C,0,Jump Shot,2PT Field Goal,In The Paint (Non-RA),Less Than 8 ft.,-4.7,9.85,6,0,2,1
4231260,C,0,Layup Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-2.3,6.05,2,0,2,0


In [7]:
for col in train_data_subset.select_dtypes(include=['object']).columns:
    train_data_subset[col] = train_data_subset[col].astype('category')
train_data_subset.dtypes

POSITION_GROUP      category
SHOT_MADE              int64
ACTION_TYPE         category
SHOT_TYPE           category
BASIC_ZONE          category
ZONE_RANGE          category
LOC_X                float64
LOC_Y                float64
SHOT_DISTANCE          int64
points_value           int64
raw_points_value       int64
is_home                int64
dtype: object

In [8]:
train_data_subset

Unnamed: 0,POSITION_GROUP,SHOT_MADE,ACTION_TYPE,SHOT_TYPE,BASIC_ZONE,ZONE_RANGE,LOC_X,LOC_Y,SHOT_DISTANCE,points_value,raw_points_value,is_home
218701,G,1,Jump Shot,3PT Field Goal,Left Corner 3,24+ ft.,23.4,12.95,24,3,3,0
218702,C,0,Step Back Jump shot,3PT Field Goal,Above the Break 3,24+ ft.,-0.4,31.45,26,0,3,0
218703,G,1,Running Finger Roll Layup Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-1.3,5.25,1,2,2,0
218704,C,1,Cutting Dunk Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-0.1,7.25,2,2,2,0
218705,F,1,Cutting Dunk Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,1.8,7.15,2,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...
4231257,G,1,Jump Shot,3PT Field Goal,Left Corner 3,24+ ft.,23.3,9.55,23,3,3,1
4231258,G,0,Jump Shot,3PT Field Goal,Above the Break 3,24+ ft.,15.1,25.85,25,0,3,1
4231259,C,0,Jump Shot,2PT Field Goal,In The Paint (Non-RA),Less Than 8 ft.,-4.7,9.85,6,0,2,1
4231260,C,0,Layup Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-2.3,6.05,2,0,2,0


In [None]:
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

train_data_subset = train_data_subset.dropna()


X = train_data_subset.drop(['SHOT_MADE', 'points_value', 'raw_points_value'], axis=1)
y = train_data_subset['SHOT_MADE']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=14)

cat_features = ['POSITION_GROUP', 'ACTION_TYPE', 'SHOT_TYPE', 'BASIC_ZONE', 'ZONE_RANGE']


model = CatBoostClassifier(
    iterations=50000,    
    depth=6,               
    learning_rate=0.001,    
    loss_function='Logloss',
    cat_features=cat_features,   
    verbose=200            
)

model.fit(X_train, y_train)


In [15]:
X_train

Unnamed: 0,POSITION_GROUP,ACTION_TYPE,SHOT_TYPE,BASIC_ZONE,ZONE_RANGE,LOC_X,LOC_Y,SHOT_DISTANCE,is_home
1622426,G,Cutting Layup Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,0.9,6.05,1,0
2059266,F,Step Back Jump shot,2PT Field Goal,Mid-Range,8-16 ft.,14.7,10.05,15,1
759699,F,Jump Shot,3PT Field Goal,Above the Break 3,24+ ft.,-17.8,23.75,25,0
1871086,G,Jump Bank Shot,2PT Field Goal,Mid-Range,8-16 ft.,-10.9,11.95,12,1
454459,F,Cutting Layup Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-0.1,5.85,0,0
...,...,...,...,...,...,...,...,...,...
1556958,G,Pullup Jump shot,3PT Field Goal,Above the Break 3,24+ ft.,-15.5,27.15,26,0
4011418,G,Jump Shot,2PT Field Goal,Mid-Range,16-24 ft.,15.6,15.25,18,1
3248400,G,Jump Shot,2PT Field Goal,Mid-Range,8-16 ft.,-9.1,12.75,11,0
298893,F,Driving Dunk Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,-1.6,6.55,2,0
