# SSQM V0
# Simple Shot Quality Model based on League Averages

In [None]:
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *
from nba_api.stats.endpoints import shotchartleaguewide

pd.options.mode.chained_assignment = None

shots_DIR = "./ShotLocationData/"
data_DIR = "../fdata/"

In [None]:
league = "NBA"
season = "2023"
season_str = season + "-" + str(int(season)+1)[-2:]
player_dict = get_players_pbp(league=league)
teams_dict, teams_list = get_teams(league="NBA")

In [None]:
dft = pd.read_parquet(data_DIR + f"NBA_Player_BoxScores_Base_"+season+".parquet", columns = ["PLAYER_ID","TEAM_ID"])
all_shots = pd.read_parquet(shots_DIR + league + "_Shot_Details_" + season + ".parquet")
all_shots.columns = map(str.lower, all_shots.columns)
all_shots = all_shots.rename(columns={"shot_made_flag":"shot_made"})
all_shots["loc_x"] = all_shots["loc_x"]/10
all_shots["loc_y"] = all_shots["loc_y"]/10
all_shots['fg2'] = np.where(all_shots['shot_type'] == '2PT Field Goal', 1, 0)
all_shots['fg3'] = np.where(all_shots['shot_type'] == '3PT Field Goal', 1, 0)
all_shots["points"] = (2*all_shots['fg2'] + 3*all_shots['fg3'])*all_shots["shot_made"]
# shots = all_shots.loc[all_shots['shot_distance'] <= 32].reset_index(drop=True)
# shots = all_shots.copy()

In [None]:
stats = shotchartleaguewide.ShotChartLeagueWide(league_id="00", season=season_str)
league_avg = stats.get_data_frames()[0]
league_avg = league_avg[["SHOT_ZONE_BASIC",	"SHOT_ZONE_AREA","SHOT_ZONE_RANGE", "FG_PCT"]]
league_avg.columns = map(str.lower, league_avg.columns)
league_avg = league_avg.rename(columns={"fg_pct":"xFG"})


In [None]:
league_avg

In [None]:
shots = pd.merge(all_shots,league_avg,on=["shot_zone_basic","shot_zone_area","shot_zone_range"])

In [None]:
shots["xPTS"] = (2*shots["fg2"]+3*shots["fg3"])*shots["xFG"]

In [None]:
shots

In [None]:
fg0 = (shots
      .groupby(['player_id'])[['points', 'xPTS', 'shot_made', 'xFG']]
      .agg({'points': ['sum', 'count'],
           'xPTS': ['sum'],
           'shot_made': ['sum'],
           'xFG': ['mean']}))
fg0.columns = ['PTS', 'number_of_shots', 'xPTS', 'number_of_makes', 'xFG%']
fg = fg0.reset_index()

fg['eFG'] = np.round(fg['PTS']/fg['number_of_shots']/2, 3)
fg['xeFG'] = np.round(fg['xPTS']/fg['number_of_shots']/2, 3)
# fg['xFG%'] = np.round(fg['xFG%'], 3)
fg['xPTS'] = np.round(fg['xPTS'], 1)
fg = fg.rename(columns = {'player_id': "player_id",
                           'number_of_shots': 'FGA',
                           'number_of_makes': 'FGM'})
fg['Shot_Making'] = np.round((fg['PTS'] - fg['xPTS'])/fg['FGA'], 2)
fg = fg[['player_id', 'FGA', 'FGM', 'eFG', 'xeFG', 'PTS', 'xPTS', 'Shot_Making']]
# fg = fg[fg["PTS"]>100]
fg["Points_Added"] = fg["Shot_Making"]* fg["FGA"]
fg = fg.rename(columns = {"player_id":"PLAYER_ID"})
fg["Player"] = fg["PLAYER_ID"].map(player_dict)
fg.insert(1,"Player",fg.pop("Player"))
fg = pd.merge(fg,dft,on="PLAYER_ID")
fg["Team"] = fg["TEAM_ID"].map(teams_dict)
fg.insert(2,"Team",fg.pop("Team"))
fg = fg.drop(columns=["TEAM_ID"])

In [None]:
df_e = fg.iloc[:,1:].query("PTS > 100").sort_values(by="Points_Added",ascending=False).reset_index(drop=True)
df_e.to_csv("NBA_Shot_Quality_V0.csv")

In [None]:
fg.query("Player == 'Davion Mitchell'")

In [None]:
df = fg.nsmallest(20,columns="xeFG")

In [None]:
df = fg.iloc[:,1:].query("PTS > 100").nlargest(10,columns="Shot_Making").reset_index(drop=True)
df.index += 1

In [None]:
print(df.to_markdown())

In [None]:
sdfdsf

In [None]:
%reload_ext rpy2.ipython

In [None]:
%%R -i df
library(tidyverse)
library(gt)
df <- df
df %>% 
  gt()%>%
  tab_header(
    title = md("**Worst Volume Shot Makers 2023-24**"),
    subtitle = "Based on a Simple Shot Quality Model | Shot Making: Points per shot (PPS) above league average" 
    ) %>%
    data_color(columns = c(xeFG), palette = c("red", "green")) %>%
    cols_align(align = "center",columns = c(Shot_Making))  %>%
    cols_label(
      Shot_Making = "Shot Making", eFG = "eFG%", xeFG = "xeFG%", Points_Added = "Points Added"
    ) %>%
    tab_options(
        table.background.color = "floralwhite",
        column_labels.font.size = 12,
        column_labels.font.weight = 'bold',
        row_group.font.weight = 'bold',
        row_group.background.color = "#E5E1D8",
        table.font.size = 10,
        heading.title.font.size = 20,
        heading.subtitle.font.size = 12.5,
        table.font.names = "Consolas", 
        data_row.padding = px(2)
    ) %>% 
    tab_source_note(
    source_note = "eFG%: Effective Field Goal % | xeFG%: Expected eFG% | xPTS: Expected Points")  %>% 
    tab_source_note(
    source_note = "Shot Quality Model is based on league average shooting from different regions on the court" ) %>% 
    tab_source_note(
    source_note = "@SravanNBA | Source: nba.com/stats" ) %>% gtsave("../figs/player_leaders/points_added.png",zoom=5) 

# Players Shot Quality by Team

In [None]:
team = 'Houston Rockets'
shots_team = shots.query(f"team_name == '{team}'")
shots_team.head()

In [None]:
fg0 = (shots_team
      .groupby(['player_name'])[['points', 'xPTS', 'shot_made', 'xFG']]
      .agg({'points': ['sum', 'count'],
           'xPTS': ['sum'],
           'shot_made': ['sum'],
           'xFG': ['mean']}))
fg0.columns = ['PTS', 'number_of_shots', 'xPTS', 'number_of_makes', 'xFG%']
fg = fg0.reset_index()
# fg = fg.nlargest(10, 'number_of_shots').sort_values('number_of_shots', ascending=False)

fg['eFG%'] = np.round(fg['PTS']/fg['number_of_shots']/2, 3)
fg['xeFG%'] = np.round(fg['xPTS']/fg['number_of_shots']/2, 3)
# fg['xFG%'] = np.round(fg['xFG%'], 3)
fg['xPTS'] = np.round(fg['xPTS'], 1)
fg = fg.rename(columns = {'player_name': "Player",
                           'number_of_shots': 'FGA',
                           'number_of_makes': 'FGM'})
fg['Shot_Making'] = np.round((fg['PTS'] - fg['xPTS'])/fg['FGA'], 2)
fg = fg[['Player', 'FGA', 'FGM', 'eFG%', 'xeFG%', 'PTS', 'xPTS', 'Shot_Making']]
fg = fg[fg["PTS"]>10]
fg["Points_Added"] = fg["Shot_Making"]* fg["FGA"]

In [None]:
fg = fg.sort_values(by="Points_Added",ascending=False)

In [None]:
df = fg.rename(columns={"eFG%":"eFG","xeFG%":"xeFG"})

In [None]:
%reload_ext rpy2.ipython

In [None]:
%%R -i df
library(tidyverse)
library(gt)
df <- df
df %>% 
  gt()%>%
  tab_header(
    title = md("**Houston Rockets Shot Making 2023-24**"),
    subtitle = "Based on a Simple Shot Quality Model" 
    ) %>%
    data_color(columns = c(xeFG), palette = c("red", "green")) %>%
    cols_align(align = "center",columns = c(Shot_Making,Points_Added))  %>%
    cols_label(
      Shot_Making = "Shot Making", eFG = "eFG%", xeFG = "xeFG%", Points_Added = "Points Added"
    ) %>%
    tab_options(
        table.background.color = "floralwhite",
        column_labels.font.size = 12,
        column_labels.font.weight = 'bold',
        row_group.font.weight = 'bold',
        row_group.background.color = "#E5E1D8",
        table.font.size = 10,
        heading.title.font.size = 20,
        heading.subtitle.font.size = 12.5,
        table.font.names = "Consolas", 
        data_row.padding = px(2)
    ) %>% 
    tab_source_note(
    source_note = "Shot Making: Points per shot (PPS) above league average")  %>% 
     tab_source_note(
    source_note = "eFG%: Effective Field Goal % | xeFG%: Expected eFG% | xPTS: Expected Points")  %>% 
    tab_source_note(
    source_note = "Shot Quality Model is based on league average shooting from different regions on the court" ) %>% 
    tab_source_note(
    source_note = "@SravanNBA | Source: nba.com/stats" ) %>% gtsave("../figs/player_leaders/rockets_points_added.png",zoom=5) 