In [None]:
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *

from sklearn.model_selection import train_test_split
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import mean_squared_error
pd.options.mode.chained_assignment = None

shots_DIR = "./ShotLocationData/"

# Andrew Patton's Code

In [None]:
league = "NBA"
season = "2022"
season_start = 2015
season_end = 2024
seasons = np.arange(season_start, season_end, 1).astype(str)

In [None]:
shota = [] 
for season in seasons:
    df1 = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
    shota.append(df1)
all_shots = pd.concat(shota)
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)

In [None]:
# fig, ax = plt.subplots(figsize=(4, 3))
# #plt.axis('equal')
# plt.xlim(-25, 25)
# plt.ylim(-4, 45)
# plt.title("Shots!")
# sns.scatterplot(
#     data=shots, 
#     x="loc_x", 
#     y="loc_y", 
#     hue="shot_type",
#     alpha = 0.25)
# plt.show()

In [None]:
train_shots, test_shots = train_test_split(shots, train_size=0.75, random_state=42)
## NOTE THAT Y COMES FIRST UNLIKE SKLEARN
y_train = train_shots['points']
X_train = train_shots[['fg2', 'fg3']]
model = sm.OLS(y_train, X_train).fit()
model.summary()

In [None]:
y_test = test_shots['points']
X_test = test_shots[['fg2', 'fg3']]
test_shots['pred_PPS'] = model.predict(X_test).values

results_by_player = (test_shots
                     .groupby(['player_name', 'shot_type'])[['points', 'pred_PPS']]
                     .agg({'points': ['sum', 'count'],
                           'pred_PPS': ['sum']}))

results_by_player.columns = ['total_points', 'number_of_shots', 'total_pred_points']
results_by_player = results_by_player.reset_index()
results_by_player['total_resid'] = results_by_player['total_pred_points'] - results_by_player['total_points']
results_by_player['resid_per_shot'] = results_by_player['total_resid']/results_by_player['number_of_shots']
results_by_player['pps_actual'] = results_by_player['total_points']/results_by_player['number_of_shots']
results_by_player['pps_pred'] = results_by_player['total_pred_points']/results_by_player['number_of_shots']

twos = results_by_player.loc[results_by_player['shot_type'] == "2PT Field Goal"]
threes = results_by_player.loc[results_by_player['shot_type'] == "3PT Field Goal"]

jittered = np.random.normal(1, 0.05, len(twos['pps_actual']))

f, ax = plt.subplots(figsize=(6, 10))
plt.title("I wish this was ggplot :(")
plt.ylabel("Actual PPS")
plt.xlabel("Jittered (1.10 predicted PPS)")
sns.scatterplot(data=twos,
                y="pps_actual",
                x=jittered,
                hue="resid_per_shot",
                size="number_of_shots",
                alpha=0.65,
                sizes=(20, 200),
                palette=sns.color_palette("vlag", as_cmap=True)
                )
plt.show()

two_error = np.round(mean_squared_error(y_true=twos['pps_actual'],
                               y_pred=twos['pps_pred'],
                               sample_weight=twos['number_of_shots'],
                               squared=False), 3)

three_error = np.round(mean_squared_error(y_true=threes['pps_actual'],
                                 y_pred=threes['pps_pred'],
                                 sample_weight=threes['number_of_shots'],
                                 squared=False), 3)

print(two_error) ## I have no idea how to include variables in the markdown cell
print(three_error) ## I have no idea how to include variables in the markdown cell

In [None]:
## NOTE THAT Y COMES FIRST UNLIKE SKLEARN
X_train = train_shots[['fg2', 'fg3', 'shot_distance']]
X_test = test_shots[['fg2', 'fg3', 'shot_distance']]
model = sm.OLS(y_train, X_train).fit()
model.summary()
test_shots['pred_PPS'] = model.predict(X_test).values

results_by_player = (test_shots
                     .groupby(['player_name', 'shot_type'])[['points', 'pred_PPS']]
                     .agg({'points': ['sum', 'count'],
                           'pred_PPS': ['sum']}))

results_by_player.columns = ['total_points', 'number_of_shots', 'total_pred_points']
results_by_player = results_by_player.reset_index()
results_by_player['total_resid'] = results_by_player['total_pred_points'] - results_by_player['total_points']
results_by_player['resid_per_shot'] = results_by_player['total_resid']/results_by_player['number_of_shots']
results_by_player['pps_actual'] = results_by_player['total_points']/results_by_player['number_of_shots']
results_by_player['pps_pred'] = results_by_player['total_pred_points']/results_by_player['number_of_shots']

twos = results_by_player.loc[results_by_player['shot_type'] == "2PT Field Goal"]
threes = results_by_player.loc[results_by_player['shot_type'] == "3PT Field Goal"]

two_error = np.round(mean_squared_error(y_true=twos['pps_actual'],
                               y_pred=twos['pps_pred'],
                               sample_weight=twos['number_of_shots'],
                               squared=False), 3)

three_error = np.round(mean_squared_error(y_true=threes['pps_actual'],
                                 y_pred=threes['pps_pred'],
                                 sample_weight=threes['number_of_shots'],
                                 squared=False), 3)

print(two_error) ## I have no idea how to include variables in the markdown cell
print(three_error) ## I have no idea how to include variables in the markdown cell

In [None]:
pd.options.mode.chained_assignment = None

shots['rim_fg'] = np.where(shots['shot_distance'] <= 6, 1, 0)
shots['corner_three'] = np.where((shots['fg3'] == 1) & (shots['loc_y'] <= 5), 1, 0)

train_shots, test_shots = train_test_split(shots, train_size=0.75, random_state=42)
y_train = train_shots['shot_made']
X_train = train_shots[['fg2', "shot_distance", "rim_fg", "corner_three"]]
X_train = sm.add_constant(X_train) ## this adds the intercept back in (default is no intercept for sm)
model = sm.GLM(y_train, 
               X_train, 
               family=sm.families.Binomial()).fit()
model.summary()

In [None]:
X_test = test_shots[['fg2', "shot_distance", "rim_fg", "corner_three"]]
X_test = sm.add_constant(X_test)
preds = model.predict(X_test).values

test_shots['xFG'] = preds
test_shots['xPTS'] = np.where(test_shots['shot_type'] == "2PT Field Goal",
                             2 * test_shots['xFG'],
                             3 * test_shots['xFG'])

fg = (test_shots
      .groupby(['player_name', 'shot_type'])[['points', 'xPTS', 'shot_made', 'xFG']]
      .agg({'points': ['sum', 'count'],
           'xPTS': ['sum'],
           'shot_made': ['sum'],
           'xFG': ['mean']}))


fg.columns = ['PTS', 'number_of_shots', 'xPTS', 'number_of_makes', 'xFG%']
fg2 = fg.reset_index()
fg2 = fg2.loc[fg2['shot_type'] == '2PT Field Goal']
# fg2 = fg2.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg2['FG%'] = np.round(fg2['number_of_makes']/fg2['number_of_shots'], 3)
fg2['xFG%'] = np.round(fg2['xFG%'], 3)
fg2['xPTS'] = np.round(fg2['xPTS'], 3)
fg2 = fg2.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FG2A',
                           'number_of_makes': 'FG2M'})
fg2['Shot Making'] = np.round((fg2['PTS'] - fg2['xPTS'])/fg2['FG2A'], 2)
fg2[['Player', 'FG2A', 'FG2M', 'FG%', 'xFG%', 'PTS', 'xPTS', 'Shot Making']]

In [None]:
fg3 = fg.reset_index()
fg3 = fg3.loc[fg3['shot_type'] == '3PT Field Goal']
# fg3 = fg3.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg3['FG%'] = np.round(fg3['number_of_makes']/fg3['number_of_shots'], 3)
fg3['xFG%'] = np.round(fg3['xFG%'], 3)
fg3['xPTS'] = np.round(fg3['xPTS'], 3)
fg3 = fg3.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FG3A',
                           'number_of_makes': 'FG3M'})
fg3['Shot Making'] = np.round((fg3['PTS'] - fg3['xPTS'])/fg3['FG3A'], 2)
fg3[['Player', 'FG3A', 'FG3M', 'FG%', 'xFG%', 'PTS', 'xPTS', 'Shot Making']]

In [None]:
pd.merge(fg2,fg3,on ="Player", suffixes=["_2", "_3"])

In [None]:
[fg.get_group(x) for x in fg.groups]

In [None]:
fg

# My Model: Statsmodels

In [56]:
league = "NBA"
season_start = 2010
season_end = 2023
seasons = np.arange(season_start, season_end, 1).astype(str)

shota = [] 
for season in seasons:
    df1 = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
    shota.append(df1)
all_shots = pd.concat(shota)
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)
shots["shot_zone_basic"][shots["shot_zone_basic"].str.contains("Corner")] = "Corner 3"
shots["RA"] = np.where(shots["shot_zone_basic"] == "Restricted Area",1,0)
shots["Paint"] = np.where(shots["shot_zone_basic"] == "In The Paint (Non-RA)",1,0)
shots["Abv_3"] = np.where(shots["shot_zone_basic"] == "Above the Break 3",1,0)
shots["MidR"] = np.where(shots["shot_zone_basic"] == "Mid-Range",1,0)
shots["Cor_3"] = np.where(shots["shot_zone_basic"] == "Corner 3",1,0)
train_shots = shots.copy()

In [57]:
# train_shots, test_shots = train_test_split(shots, train_size=0.75, random_state=42)
y_train = train_shots['shot_made']
X_train = train_shots[["shot_distance", "RA" ,"Paint","MidR","Abv_3","Cor_3"]]
X_train = sm.add_constant(X_train) ## this adds the intercept back in (default is no intercept for sm)
model = sm.GLM(y_train, 
               X_train, 
               family=sm.families.Binomial()).fit()
model.summary()

0,1,2,3
Dep. Variable:,shot_made,No. Observations:,2623797.0
Model:,GLM,Df Residuals:,2623791.0
Model Family:,Binomial,Df Model:,5.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-1740600.0
Date:,"Tue, 05 Dec 2023",Deviance:,3481100.0
Time:,00:01:21,Pearson chi2:,2620000.0
No. Iterations:,100,Pseudo R-squ. (CS):,0.05172
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-6.941e+08,5.19e+08,-1.337,0.181,-1.71e+09,3.24e+08
shot_distance,-0.0331,0.001,-54.967,0.000,-0.034,-0.032
RA,6.941e+08,5.19e+08,1.337,0.181,-3.24e+08,1.71e+09
Paint,6.941e+08,5.19e+08,1.337,0.181,-3.24e+08,1.71e+09
MidR,6.941e+08,5.19e+08,1.337,0.181,-3.24e+08,1.71e+09
Abv_3,6.941e+08,5.19e+08,1.337,0.181,-3.24e+08,1.71e+09
Cor_3,6.941e+08,5.19e+08,1.337,0.181,-3.24e+08,1.71e+09


In [58]:
league = "NBA"
season_start = 2023
season_end = 2024
seasons = np.arange(season_start, season_end, 1).astype(str)

shota = [] 
for season in seasons:
    df1 = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
    shota.append(df1)
all_shots = pd.concat(shota)
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)
shots["shot_zone_basic"][shots["shot_zone_basic"].str.contains("Corner")] = "Corner 3"
shots["RA"] = np.where(shots["shot_zone_basic"] == "Restricted Area",1,0)
shots["Paint"] = np.where(shots["shot_zone_basic"] == "In The Paint (Non-RA)",1,0)
shots["Abv_3"] = np.where(shots["shot_zone_basic"] == "Above the Break 3",1,0)
shots["MidR"] = np.where(shots["shot_zone_basic"] == "Mid-Range",1,0)
shots["Cor_3"] = np.where(shots["shot_zone_basic"] == "Corner 3",1,0)
test_shots = shots.copy()

In [60]:
X_test = test_shots[["shot_distance", "RA" ,"Paint","MidR","Abv_3","Cor_3"]]
X_test = sm.add_constant(X_test) 
preds = model.predict(X_test).values

In [61]:
test_shots['xFG'] = preds
test_shots['xPTS'] = np.where(test_shots['shot_type'] == "2PT Field Goal",
                             2 * test_shots['xFG'],
                             3 * test_shots['xFG'])
fg = (test_shots
      .groupby(['player_name', 'shot_type'])[['points', 'xPTS', 'shot_made', 'xFG']]
      .agg({'points': ['sum', 'count'],
           'xPTS': ['sum'],
           'shot_made': ['sum'],
           'xFG': ['mean']}))


fg.columns = ['PTS', 'number_of_shots', 'xPTS', 'number_of_makes', 'xFG%']

In [62]:

fg2 = fg.reset_index()
fg2 = fg2.loc[fg2['shot_type'] == '2PT Field Goal']
fg2 = fg2.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg2['FG%'] = np.round(fg2['number_of_makes']/fg2['number_of_shots'], 3)
fg2['xFG%'] = np.round(fg2['xFG%'], 3)
fg2['xPTS'] = np.round(fg2['xPTS'], 3)
fg2 = fg2.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FG2A',
                           'number_of_makes': 'FG2M'})
fg2['Shot Making'] = np.round((fg2['PTS'] - fg2['xPTS'])/fg2['FG2A'], 2)
fg2_1 = fg2[['Player', 'FG2A', 'FG2M', 'FG%', 'xFG%', 'PTS', 'xPTS', 'Shot Making']]

In [63]:
fg3 = fg.reset_index()
fg3 = fg3.loc[fg3['shot_type'] == '3PT Field Goal']
fg3 = fg3.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg3['FG%'] = np.round(fg3['number_of_makes']/fg3['number_of_shots'], 3)
fg3['xFG%'] = np.round(fg3['xFG%'], 3)
fg3['xPTS'] = np.round(fg3['xPTS'], 3)
fg3 = fg3.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FG3A',
                           'number_of_makes': 'FG3M'})
fg3['Shot Making'] = np.round((fg3['PTS'] - fg3['xPTS'])/fg3['FG3A'], 2)
fg3_1 = fg3[['Player', 'FG3A', 'FG3M', 'FG%', 'xFG%', 'PTS', 'xPTS', 'Shot Making']]

In [29]:
# asdsad

# My Model: Statsmodels - Location

In [68]:
league = "NBA"
season_start = 2010
season_end = 2023
seasons = np.arange(season_start, season_end, 1).astype(str)

shota = [] 
for season in seasons:
    df1 = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
    shota.append(df1)
all_shots = pd.concat(shota)
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)
train_shots = shots.copy()

In [69]:
y_train = train_shots['shot_made']
X_train = train_shots[["loc_x", "loc_y"]]
X_train = sm.add_constant(X_train) ## this adds the intercept back in (default is no intercept for sm)
model = sm.GLM(y_train, 
               X_train, 
               family=sm.families.Binomial()).fit()
model.summary()

0,1,2,3
Dep. Variable:,shot_made,No. Observations:,2623797.0
Model:,GLM,Df Residuals:,2623794.0
Model Family:,Binomial,Df Model:,2.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-1776100.0
Date:,"Tue, 05 Dec 2023",Deviance:,3552200.0
Time:,00:04:45,Pearson chi2:,2630000.0
No. Iterations:,5,Pseudo R-squ. (CS):,0.02568
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.1549,0.002,89.308,0.000,0.151,0.158
loc_x,-0.0010,0.000,-8.427,0.000,-0.001,-0.001
loc_y,-0.0374,0.000,-256.902,0.000,-0.038,-0.037


In [70]:
league = "NBA"
season_start = 2023
season_end = 2024
seasons = np.arange(season_start, season_end, 1).astype(str)

shota = [] 
for season in seasons:
    df1 = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
    shota.append(df1)
all_shots = pd.concat(shota)
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)
test_shots = shots.copy()

In [71]:
X_test = test_shots[["loc_x", "loc_y"]]
X_test = sm.add_constant(X_test) 
preds = model.predict(X_test).values

In [72]:
test_shots['xFG'] = preds
test_shots['xPTS'] = np.where(test_shots['shot_type'] == "2PT Field Goal",
                             2 * test_shots['xFG'],
                             3 * test_shots['xFG'])
fg = (test_shots
      .groupby(['player_name', 'shot_type'])[['points', 'xPTS', 'shot_made', 'xFG']]
      .agg({'points': ['sum', 'count'],
           'xPTS': ['sum'],
           'shot_made': ['sum'],
           'xFG': ['mean']}))

In [73]:

fg.columns = ['PTS', 'number_of_shots', 'xPTS', 'number_of_makes', 'xFG%']
fg2 = fg.reset_index()
fg2 = fg2.loc[fg2['shot_type'] == '2PT Field Goal']
fg2 = fg2.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg2['FG%'] = np.round(fg2['number_of_makes']/fg2['number_of_shots'], 3)
fg2['xFG%'] = np.round(fg2['xFG%'], 3)
fg2['xPTS'] = np.round(fg2['xPTS'], 3)
fg2 = fg2.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FG2A',
                           'number_of_makes': 'FG2M'})
fg2['Shot Making'] = np.round((fg2['PTS'] - fg2['xPTS'])/fg2['FG2A'], 2)
fg2_2 = fg2[['Player', 'FG2A', 'FG2M', 'FG%', 'xFG%', 'PTS', 'xPTS', 'Shot Making']]

In [74]:
fg3 = fg.reset_index()
fg3 = fg3.loc[fg3['shot_type'] == '3PT Field Goal']
fg3 = fg3.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg3['FG%'] = np.round(fg3['number_of_makes']/fg3['number_of_shots'], 3)
fg3['xFG%'] = np.round(fg3['xFG%'], 3)
fg3['xPTS'] = np.round(fg3['xPTS'], 3)
fg3 = fg3.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FG3A',
                           'number_of_makes': 'FG3M'})
fg3['Shot Making'] = np.round((fg3['PTS'] - fg3['xPTS'])/fg3['FG3A'], 2)
fg3_2 = fg3[['Player', 'FG3A', 'FG3M', 'FG%', 'xFG%', 'PTS', 'xPTS', 'Shot Making']]

In [76]:
fg2_1

Unnamed: 0,Player,FG2A,FG2M,FG%,xFG%,PTS,xPTS,Shot Making
442,Joel Embiid,251,133,0.53,0.494,266,248.224,0.07
301,Giannis Antetokounmpo,238,154,0.647,0.539,308,256.48,0.22
783,Shai Gilgeous-Alexander,237,137,0.578,0.486,274,230.249,0.18
529,Kevin Durant,235,126,0.536,0.448,252,210.479,0.18
43,Anthony Davis,216,119,0.551,0.513,238,221.648,0.08
680,Nikola Jokic,214,142,0.664,0.494,284,211.349,0.34
206,DeMar DeRozan,212,97,0.458,0.457,194,193.914,0.0
100,Cade Cunningham,204,90,0.441,0.49,180,199.866,-0.1
57,Bam Adebayo,199,107,0.538,0.462,214,183.945,0.15
878,Zion Williamson,196,108,0.551,0.552,216,216.576,-0.0


In [75]:
fg2_2

Unnamed: 0,Player,FG2A,FG2M,FG%,xFG%,PTS,xPTS,Shot Making
442,Joel Embiid,251,133,0.53,0.48,266,241.077,0.1
301,Giannis Antetokounmpo,238,154,0.647,0.502,308,238.839,0.29
783,Shai Gilgeous-Alexander,237,137,0.578,0.489,274,231.577,0.18
529,Kevin Durant,235,126,0.536,0.469,252,220.357,0.13
43,Anthony Davis,216,119,0.551,0.496,238,214.118,0.11
680,Nikola Jokic,214,142,0.664,0.496,284,212.213,0.34
206,DeMar DeRozan,212,97,0.458,0.461,194,195.63,-0.01
100,Cade Cunningham,204,90,0.441,0.481,180,196.286,-0.08
57,Bam Adebayo,199,107,0.538,0.475,214,188.862,0.13
878,Zion Williamson,196,108,0.551,0.516,216,202.196,0.07


In [77]:
fg3_1

Unnamed: 0,Player,FG3A,FG3M,FG%,xFG%,PTS,xPTS,Shot Making
796,Stephen Curry,163,72,0.442,0.347,216,169.536,0.29
591,Luka Doncic,146,60,0.411,0.347,180,151.951,0.19
417,Jayson Tatum,134,50,0.373,0.353,150,141.907,0.06
819,Tim Hardaway Jr.,133,53,0.398,0.358,159,142.864,0.12
232,Desmond Bane,131,49,0.374,0.353,147,138.628,0.06
568,LaMelo Ball,130,50,0.385,0.348,150,135.802,0.11
576,Lauri Markkanen,126,49,0.389,0.357,147,134.981,0.1
177,Damian Lillard,114,37,0.325,0.344,111,117.699,-0.06
638,Michael Porter Jr.,114,45,0.395,0.358,135,122.447,0.11
850,Tyrese Maxey,114,48,0.421,0.346,144,118.162,0.23


In [79]:
fg3_2

Unnamed: 0,Player,FG3A,FG3M,FG%,xFG%,PTS,xPTS,Shot Making
796,Stephen Curry,163,72,0.442,0.348,216,170.123,0.28
591,Luka Doncic,146,60,0.411,0.337,180,147.39,0.22
417,Jayson Tatum,134,50,0.373,0.354,150,142.292,0.06
819,Tim Hardaway Jr.,133,53,0.398,0.379,159,151.04,0.06
232,Desmond Bane,131,49,0.374,0.365,147,143.352,0.03
568,LaMelo Ball,130,50,0.385,0.359,150,139.85,0.08
576,Lauri Markkanen,126,49,0.389,0.381,147,144.011,0.02
177,Damian Lillard,114,37,0.325,0.34,111,116.338,-0.05
638,Michael Porter Jr.,114,45,0.395,0.378,135,129.114,0.05
850,Tyrese Maxey,114,48,0.421,0.338,144,115.538,0.25


In [None]:
asdasd

NameError: name 'asdasd' is not defined

# My Model Scikit-learn

In [86]:
league = "NBA"
season_start = 2010
season_end = 2023
seasons = np.arange(season_start, season_end, 1).astype(str)

shota = [] 
for season in seasons:
    df1 = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
    shota.append(df1)
all_shots = pd.concat(shota)
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)

In [121]:
shots["shot_zone_basic"] = shots["shot_zone_basic"].astype("category")
shots["shot_zone_area"] = shots["shot_zone_area"].astype("category")

In [122]:
df_encoded = pd.get_dummies(shots, columns=['shot_zone_basic'])

In [123]:
df_encoded

Unnamed: 0,grid_type,game_id,game_event_id,player_id,player_name,team_id,team_name,period,minutes_remaining,seconds_remaining,...,vtm,fg2,fg3,points,shot_zone_basic_Above the Break 3,shot_zone_basic_In The Paint (Non-RA),shot_zone_basic_Left Corner 3,shot_zone_basic_Mid-Range,shot_zone_basic_Restricted Area,shot_zone_basic_Right Corner 3
0,Shot Chart Detail,21000043,582,202399,Jeff Adrien,1610612744,Golden State Warriors,4,0,4,...,GSW,1,0,2,False,False,False,False,True,False
1,Shot Chart Detail,21000119,288,202399,Jeff Adrien,1610612744,Golden State Warriors,3,6,23,...,GSW,1,0,2,False,False,False,False,True,False
2,Shot Chart Detail,21000119,292,202399,Jeff Adrien,1610612744,Golden State Warriors,3,5,44,...,GSW,1,0,0,False,False,False,True,False,False
3,Shot Chart Detail,21000137,299,202399,Jeff Adrien,1610612744,Golden State Warriors,3,5,52,...,GSW,1,0,0,False,False,False,False,True,False
4,Shot Chart Detail,21000137,308,202399,Jeff Adrien,1610612744,Golden State Warriors,3,5,1,...,GSW,1,0,2,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2623792,Shot Chart Detail,22201229,259,1627826,Ivica Zubac,1610612746,LA Clippers,2,4,57,...,LAC,1,0,2,False,True,False,False,False,False
2623793,Shot Chart Detail,22201229,304,1627826,Ivica Zubac,1610612746,LA Clippers,2,1,13,...,LAC,1,0,0,False,False,False,False,True,False
2623794,Shot Chart Detail,22201229,360,1627826,Ivica Zubac,1610612746,LA Clippers,3,10,19,...,LAC,1,0,2,False,False,False,False,True,False
2623795,Shot Chart Detail,22201229,416,1627826,Ivica Zubac,1610612746,LA Clippers,3,5,48,...,LAC,1,0,2,False,True,False,False,False,False


In [87]:
shots.iloc[0]

grid_type                  Shot Chart Detail
game_id                             21000043
game_event_id                            582
player_id                             202399
player_name                      Jeff Adrien
team_id                           1610612744
team_name              Golden State Warriors
period                                     4
minutes_remaining                          0
seconds_remaining                          4
event_type                         Made Shot
action_type                         Tip Shot
shot_type                     2PT Field Goal
shot_zone_basic              Restricted Area
shot_zone_area                     Center(C)
shot_zone_range              Less Than 8 ft.
shot_distance                              1
loc_x                                    1.5
loc_y                                    0.7
shot_attempted_flag                        1
shot_made                                  1
game_date                           20101031
htm       

In [111]:
shots_group = shots.groupby(["player_name"])

In [112]:
shots_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002179628FDD0>

In [115]:
shots_group[["player_name","shot_made","shot_zone_basic","shot_zone_area"]].get_group("Stephen Curry")

Unnamed: 0,player_name,shot_made,shot_zone_basic,shot_zone_area
44196,Stephen Curry,1,Mid-Range,Center(C)
44197,Stephen Curry,0,Mid-Range,Right Side(R)
44198,Stephen Curry,1,Mid-Range,Left Side Center(LC)
44199,Stephen Curry,1,Mid-Range,Right Side Center(RC)
44200,Stephen Curry,0,Above the Break 3,Left Side Center(LC)
...,...,...,...,...
2453954,Stephen Curry,0,Above the Break 3,Left Side Center(LC)
2453955,Stephen Curry,1,Above the Break 3,Right Side Center(RC)
2453956,Stephen Curry,1,Restricted Area,Center(C)
2453957,Stephen Curry,1,Left Corner 3,Left Side(L)
