# SSQM v2.0
## Using NBA Shooting Data: Considers shot type, closest defender distance and touch time
## Bin shots by filtering each condition combination and then use that for SSQM

In [None]:
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *
from nba_api.stats.endpoints import playerdashptshots, leaguedashplayerbiostats

pd.options.mode.chained_assignment = None

data_DIR1 = "../data/box/"
data_DIR2 = "../data/shots/"

In [None]:
league = "NBA"
player_dict = get_players_pbp(league=league)
teams_dict, teams_list = get_teams(league="NBA")

In [None]:
season = "2023"
dft = pd.read_parquet(data_DIR1 + f"NBA_Box_P_Cum_Base_"+season+".parquet", columns = ["PLAYER_ID","TEAM_ID"])
df = pd.read_parquet(data_DIR2 + f"{league}_Shots_{season}_All.parquet")
dfd = pd.read_parquet(data_DIR2 + f"{league}_Shots_{season}_overall.parquet", columns = ["PLAYER_ID","FGM","FGA"])
dfd = dfd.sort_values(by = "PLAYER_ID").reset_index(drop=True)

In [None]:
df = df[["PLAYER_ID","PLAYER_NAME","PLAYER_LAST_TEAM_ID","FGM","FGA","FG2M","FG2A","FG3M","FG3A", 'general_range', 'closest_def', 'touch_time']]
df = df.query("general_range != 'Other'")
df_avg = df.groupby(['general_range', 'closest_def', 'touch_time']).sum()
df_avg = df_avg.drop(columns= ["PLAYER_ID","PLAYER_NAME","PLAYER_LAST_TEAM_ID"])
df_avg["xFG2"] = df_avg["FG2M"]/df_avg["FG2A"]
df_avg["xFG3"] = df_avg["FG3M"]/df_avg["FG3A"]
df_avg = df_avg.drop(columns =["FGM","FGA","FG2M","FG2A","FG3M","FG3A"])
df_avg = df_avg.reset_index()

In [None]:
df_avg

In [None]:
shots = pd.merge(df,df_avg,on=['general_range', 'closest_def', 'touch_time'])

In [None]:
shots["FG2_PCT"] = shots["FG2M"]/shots["FG2A"]
shots["FG3_PCT"] = shots["FG3M"]/shots["FG3A"]
shots = shots.replace([np.inf, -np.inf], np.nan)
shots = shots.fillna(0)
shots["PTS"] =  (2*shots["FG2A"]*shots["FG2_PCT"] + 3*shots["FG3A"]*shots["FG3_PCT"]).round(2)
shots["xPTS"] = (2*shots["FG2A"]*shots["xFG2"] + 3*shots["FG3A"]*shots["xFG3"]).round(2)

In [None]:
# shots.query("PLAYER_NAME == 'LeBron James'")

In [None]:
fg = (shots
    .groupby(['PLAYER_ID'])[['FGM', 'FGA', 'PTS', 'xPTS']]
    .agg({'FGM': ["sum"], 'FGA': ["sum"], 'PTS': ["sum"], 'xPTS': ["sum"]}))
fg.columns = ['FGM', 'FGA', 'PTS', 'xPTS']
fg['eFG'] = np.round(fg['PTS']/fg['FGA']/2, 3)
fg['xeFG'] = np.round(fg['xPTS']/fg['FGA']/2, 3)
fg['Shot_Making'] = np.round((fg['PTS'] - fg['xPTS'])/fg['FGA'], 3)
fg = fg.drop(columns=['FGM', 'FGA'])
fg = fg.fillna(0)
fg = pd.merge(dfd,fg,on=["PLAYER_ID"])
fg["Points_Added"] = fg["Shot_Making"]* fg["FGA"]
fg["PTS"] = fg["PTS"].astype(int)
fg = fg.reset_index()
fg["Player"] = fg["PLAYER_ID"].map(player_dict)
fg.insert(1,"Player",fg.pop("Player"))
fg = pd.merge(fg,dft,on="PLAYER_ID")
fg["Team"] = fg["TEAM_ID"].map(teams_dict)
fg.insert(2,"Team",fg.pop("Team"))
fg[['Points_Added']] = fg[['Points_Added']].round(1)
fg[['Shot_Making']] = fg[['Shot_Making']].round(3)
fg = fg.drop(columns=["TEAM_ID"])

In [None]:
# fg.sort_values(by="Points_Added")

In [None]:
# kjh

In [None]:
df_e = fg.iloc[:,1:].query("PTS > 100").nlargest(10,columns="Points_Added").reset_index(drop=True)
df_e = df_e.drop(columns=["PLAYER_ID"])
df_e.index += 1

In [None]:
%reload_ext rpy2.ipython

In [None]:
%%R -i df_e
library(tidyverse)
library(gt)
df <- df_e
df %>% 
  gt()%>%
  tab_header(
    title = md("**Best Volume Shot Makers 2023-24**"),
    subtitle = "Based on SSQM v2.0 | Shot Making: Points per shot (PPS) above league average" 
    ) %>%
    data_color(columns = c(eFG,xeFG,Shot_Making,Points_Added), palette = "YlGn",method="quantile",quantiles=9) %>%
    cols_align(align = "center",columns = c(Shot_Making,Points_Added))  %>%
    cols_label(
      Shot_Making = "Shot Making", eFG = "eFG%", xeFG = "xeFG%", Points_Added = "Points Added"
    ) %>%
    tab_options(
        table.background.color = "floralwhite",
        column_labels.font.size = 12,
        column_labels.font.weight = 'bold',
        row_group.font.weight = 'bold',
        row_group.background.color = "#E5E1D8",
        table.font.size = 10,
        heading.title.font.size = 20,
        heading.subtitle.font.size = 12.5,
        table.font.names = "Consolas", 
        data_row.padding = px(2)
    ) %>% 
    tab_source_note(
    source_note = "eFG%: Effective Field Goal % | xeFG%: Expected eFG% | xPTS: Expected Points")  %>% 
    tab_source_note(
    source_note = "Simple Shot Quality Model: SSQM v2.0 is based on shot type, defender distance & touch time" ) %>% 
    tab_source_note(
    source_note = "@SravanNBA | Source: nba.com/stats" ) %>% gtsave("../figs/player_leaders/ssqm2_points_added.png",zoom=5) 

In [None]:
df_e = fg.iloc[:,1:].query("PTS > 100").nsmallest(10,columns="Points_Added").reset_index(drop=True)
df_e = df_e.drop(columns=["PLAYER_ID"])
df_e.index += 1

In [None]:
%%R -i df_e
library(tidyverse)
library(gt)
df <- df_e
df %>% 
  gt()%>%
  tab_header(
    title = md("**Worst Volume Shot Makers 2023-24**"),
    subtitle = "Based on SSQM v2.0 | Shot Making: Points per shot (PPS) above league average" 
    ) %>%
    data_color(columns = c(eFG,xeFG,Shot_Making,Points_Added), palette = "OrRd",method="quantile",quantiles=9, reverse=TRUE) %>%
    cols_align(align = "center",columns = c(Shot_Making,Points_Added))  %>%
    cols_label(
      Shot_Making = "Shot Making", eFG = "eFG%", xeFG = "xeFG%", Points_Added = "Points Added"
    ) %>%
    tab_options(
        table.background.color = "floralwhite",
        column_labels.font.size = 12,
        column_labels.font.weight = 'bold',
        row_group.font.weight = 'bold',
        row_group.background.color = "#E5E1D8",
        table.font.size = 10,
        heading.title.font.size = 20,
        heading.subtitle.font.size = 12.5,
        table.font.names = "Consolas", 
        data_row.padding = px(2)
    ) %>% 
    tab_source_note(
    source_note = "eFG%: Effective Field Goal % | xeFG%: Expected eFG% | xPTS: Expected Points")  %>% 
    tab_source_note(
    source_note = "Simple Shot Quality Model: SSQM v2.0 is based on shot type, defender distance & touch time" ) %>% 
    tab_source_note(
    source_note = "@SravanNBA | Source: nba.com/stats" ) %>% gtsave("../figs/player_leaders/ssqm2_points_added_worst.png",zoom=5) 

In [None]:
df_e = fg.iloc[:,1:].query("PTS > 500").nsmallest(10,columns="xeFG").reset_index(drop=True)
df_e = df_e.drop(columns=["PLAYER_ID"])
df_e.index += 1
df_e

In [None]:
%%R -i df_e
library(tidyverse)
library(gt)
df <- df_e
df %>% 
  gt()%>%
  tab_header(
    title = md("**Toughest Shot Takers 2023-24**"),
    subtitle = "Based on SSQM v2.0 | Among Players Scoring at least 500 Pts " 
    ) %>%
    data_color(columns = c(eFG,xeFG), palette = "OrRd",method="quantile",quantiles=9, reverse=TRUE) %>%
    data_color(columns = c(Shot_Making,Points_Added), palette = "YlGn",method="quantile",quantiles=9) %>%
    cols_align(align = "center",columns = c(Shot_Making,Points_Added))  %>%
    cols_label(
      Shot_Making = "Shot Making", eFG = "eFG%", xeFG = "xeFG%", Points_Added = "Points Added"
    ) %>%
    tab_options(
        table.background.color = "floralwhite",
        column_labels.font.size = 12,
        column_labels.font.weight = 'bold',
        row_group.font.weight = 'bold',
        row_group.background.color = "#E5E1D8",
        table.font.size = 10,
        heading.title.font.size = 20,
        heading.subtitle.font.size = 12.5,
        table.font.names = "Consolas", 
        data_row.padding = px(2)
    ) %>% 
    tab_source_note(
    source_note = "Shot Making: Points per shot (PPS) above league average | eFG%: Effective Field Goal % | xeFG%: Expected eFG% | xPTS: Expected Points")  %>% 
    tab_source_note(
    source_note = "Simple Shot Quality Model: SSQM v2.0 is based on shot type, defender distance & touch time" ) %>% 
    tab_source_note(
    source_note = "@SravanNBA | Source: nba.com/stats" ) %>% gtsave("../figs/player_leaders/ssqm2_toughest.png",zoom=5) 

In [None]:
df_e = fg.iloc[:,1:].query("PTS > 100").query("Team == 'New Orleans Pelicans'").sort_values("Points_Added",ascending=False).reset_index(drop=True)
df_e = df_e.drop(columns=["PLAYER_ID","Team"])
df_e.index += 1

In [None]:
%%R -i df_e
library(tidyverse)
library(gt)
df <- df_e
df %>% 
  gt()%>%
  tab_header(
    title = md("**New Orleans Pelicans Shot Making 2023-24**"),
    subtitle = "Based on SSQM v2.0 | Shot Making: Points per shot (PPS) above league average" 
    ) %>%
    data_color(columns = c(eFG,xeFG,Shot_Making,Points_Added), palette = "PiYG") %>%
    cols_align(align = "center",columns = c(Shot_Making,Points_Added))  %>%
    cols_label(
      Shot_Making = "Shot Making", eFG = "eFG%", xeFG = "xeFG%", Points_Added = "Points Added"
    ) %>%
    tab_options(
        table.background.color = "floralwhite",
        column_labels.font.size = 12,
        column_labels.font.weight = 'bold',
        row_group.font.weight = 'bold',
        row_group.background.color = "#E5E1D8",
        table.font.size = 10,
        heading.title.font.size = 20,
        heading.subtitle.font.size = 12.5,
        table.font.names = "Consolas", 
        data_row.padding = px(2)
    ) %>% 
    tab_source_note(
    source_note = "Shot Making: Points per shot (PPS) above league average")  %>% 
     tab_source_note(
    source_note = "eFG%: Effective Field Goal % | xeFG%: Expected eFG% | xPTS: Expected Points")  %>% 
    tab_source_note(
    source_note = "Simple Shot Quality Model: SSQM v2.0 is based on shot type, defender distance & touch time" ) %>% 
    tab_source_note(
    source_note = "@SravanNBA | Source: nba.com/stats" ) %>% gtsave("../figs/player_leaders/pelicans_points_added.png",zoom=5) 

In [None]:
export_DIR = "../../repos/csv/"

In [None]:
# df_e.to_csv(export_DIR + "NBA_Shot_Quality.csv")

In [None]:
df_e.query("Player == 'Davion Mitchell'")

In [None]:
df_e.sort_values("PTS")

In [None]:
df_e.to_csv("NBA_Shot_Quality_V2.csv")