In [None]:
!pip install ripser

In [None]:
!pip install nba_api

In [None]:
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.static import teams
from tqdm import tqdm
from scipy import stats

In [None]:
def load_play_type_data(offensive_types, defensive_types):
  offensive_data = {}
  for i in offensive_types:
    offensive_data[i] = pd.read_csv(i + "_offense.csv")

  defensive_data = {}
  for i in defensive_types:
    defensive_data[i] = pd.read_csv(i + "_defense.csv")

  return offensive_data, defensive_data

In [None]:
def load_normalized_play_type_data(offensive_types, defensive_types):
  offensive_data = {}
  for i in offensive_types:
    offensive_data[i] = pd.read_csv(i + "_offense.csv").dropna()
    offensive_data[i]['REL_PPP'] = stats.zscore(offensive_data[i]["PPP"])
  defensive_data = {}
  for i in defensive_types:
    defensive_data[i] = pd.read_csv(i + "_defense.csv").dropna()

    INV_PPP = 1.0 / defensive_data[i]["PPP"]
    INV_PPP[np.isinf(INV_PPP)] = 0.0
    defensive_data[i]["REL_PPP"] = stats.zscore(INV_PPP)

  return offensive_data, defensive_data

In [None]:
def generate_team_space(player_names, offensive_data, defensive_data):
  team_data = [] * len(player_names)
  for i in range(len(player_names)):
    player_data = []
    for play_type in offensive_data.keys():
      df = offensive_data[play_type]
      if player_names[i] in df['PLAYER'].values:
        player_data.append(df[df['PLAYER'] == player_names[i]]['REL_PPP'].values[0])
      else:
        player_data.append(0.0)

    for play_type in defensive_data.keys():
      df = defensive_data[play_type]
      if player_names[i] in df['PLAYER'].values:
        player_data.append(df[df['PLAYER'] == player_names[i]]['REL_PPP'].values[0])
      else:
        player_data.append(0.0)

    team_data.append(player_data)

  return np.array(team_data)



In [320]:
def generate_team_offensive_space(player_names, offensive_data):
  team_data = [] * len(player_names)
  for i in range(len(player_names)):
    player_data = []
    for play_type in offensive_data.keys():
      df = offensive_data[play_type]
      if player_names[i] in df['PLAYER'].values:
        player_data.append(df[df['PLAYER'] == player_names[i]]['REL_PPP'].values[0])
      else:
        player_data.append(0.0)

    team_data.append(player_data)
  return np.array(team_data)

In [334]:
def generate_team_defensive_space(player_names, defensive_data):
  team_data = [] * len(player_names)
  for i in range(len(player_names)):
    player_data = []
    for play_type in defensive_data.keys():
      df = defensive_data[play_type]
      if player_names[i] in df['PLAYER'].values:
        player_data.append(df[df['PLAYER'] == player_names[i]]['REL_PPP'].values[0])
      else:
        player_data.append(0.0)

    team_data.append(player_data)
  return np.array(team_data)

In [None]:
offensive_play_types = [
    'transition',
    'iso',
    'pnr_handler',
    'pnr_roller',
    'spot_up',
    'putback',
    'post_up',
    'off_screen',
    'handoff',
    'cut'
]
no_defensive_stats = ['putback', 'cut', 'transition']
defensive_play_types = [i for i in offensive_play_types if i not in no_defensive_stats]

In [None]:
#get player rosters
all_team_ids = [i['id'] for i in teams.get_teams()]
all_rosters = [commonteamroster.CommonTeamRoster(team_id=i, season="2023-24").get_data_frames()[0] for i in all_team_ids]

In [None]:
all_rosters[0]

In [345]:
#Load play type data
offensive_data, defensive_data = load_normalized_play_type_data(offensive_play_types, defensive_play_types)

In [357]:
#Generate space matrix for each time
team_space_dict = {}
for team in all_rosters:
  team_id = team['TeamID'].values[0]
  team_space_dict[team_id] = generate_team_space(team["PLAYER"].tolist(), offensive_data, defensive_data)

In [358]:
maxdim = 1
team_id = []
mean_deaths = {}
std_dev = {}
persistence_vals = {}
homology_objects = {}

for i in range(maxdim):
  mean_deaths[str(i)] = []
  std_dev[str(i)] = []

for team in team_space_dict.keys():
  result = ripser(team_space_dict[team], maxdim=maxdim, do_cocycles=True)
  homology_objects[team] = result
  diagrams = result['dgms']
  persistence_vals[team] = [i for i in diagrams[i][:, 1] if i != np.inf]

  for i in range(maxdim):
    resolutions_at_death = diagrams[i][:, 1]
    mean_deaths[str(i)].append(np.mean([j for j in resolutions_at_death if j != np.inf]))
    std_dev[str(i)].append(np.std([j for j in resolutions_at_death if j != np.inf]))

  team_id.append(team)


The input matrix is square, but the distance_matrix flag is off.  Did you mean to indicate that this was a distance matrix?


The input point cloud has more columns than rows; did you mean to transpose?


The input matrix is square, but the distance_matrix flag is off.  Did you mean to indicate that this was a distance matrix?


The input matrix is square, but the distance_matrix flag is off.  Did you mean to indicate that this was a distance matrix?


The input matrix is square, but the distance_matrix flag is off.  Did you mean to indicate that this was a distance matrix?


The input matrix is square, but the distance_matrix flag is off.  Did you mean to indicate that this was a distance matrix?


The input matrix is square, but the distance_matrix flag is off.  Did you mean to indicate that this was a distance matrix?


The input point cloud has more columns than rows; did you mean to transpose?


The input matrix is square, but the distance_matrix flag is off.  Did you mean to ind

In [359]:
death_df = pd.DataFrame(mean_deaths)
death_df['id'] = team_id
death_df['std'] = list(std_dev.values())[0]

stringed_ids = [teams.find_team_name_by_id(i)['abbreviation'] for i in list(persistence_vals.keys())]
death_df = death_df.merge(pd.json_normalize(teams.get_teams()), left_on="id", right_on="id")
death_df = death_df.drop(["nickname", "city", "state", "year_founded", "full_name"], axis=1)
e_ratings = pd.read_csv("e_ratings.csv")
e_ratings = e_ratings.rename(columns={'TEAM': 'abbreviation'})
death_df = death_df.merge(e_ratings, on="abbreviation")

In [None]:
death_df

Unnamed: 0.1,0,1,id,abbreviation,Unnamed: 0,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG
0,3.584194,,1610612737,ATL,23,49,22,27,2372,115.2,117.8,-2.6
1,2.965598,4.789091,1610612738,BOS,1,50,38,12,2425,118.2,109.0,9.1
2,2.983144,,1610612739,CLE,5,47,31,16,2266,113.5,108.0,5.5
3,2.866538,3.027956,1610612740,NOP,7,49,28,21,2357,115.2,111.0,4.2
4,2.709943,,1610612741,CHI,19,50,23,27,2430,111.3,112.2,-0.9
5,3.338398,,1610612742,DAL,17,49,26,23,2352,115.2,115.5,-0.3
6,2.637471,,1610612743,DEN,10,51,35,16,2448,115.7,112.3,3.4
7,3.009247,,1610612744,GSW,16,46,21,25,2238,115.1,115.0,0.1
8,2.731922,,1610612745,HOU,12,49,23,26,2377,110.8,109.6,1.2
9,2.668551,2.979239,1610612746,LAC,3,48,33,15,2309,118.0,110.8,7.2


In [None]:
import plotly.express as px
fig = px.scatter(death_df, x="0", y="NETRTG", color="abbreviation")
fig.show()

In [None]:
# prompt: fit a linear regression for the above

import statsmodels.api as sm

# Prepare the data
X = death_df["0"].values.reshape(-1, 1)
y = death_df["NETRTG"].values

# Add a constant term to the design matrix
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Print the model summary
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.106
Model:                            OLS   Adj. R-squared:                  0.074
Method:                 Least Squares   F-statistic:                     3.317
Date:                Mon, 05 Feb 2024   Prob (F-statistic):             0.0793
Time:                        18:46:19   Log-Likelihood:                -91.481
No. Observations:                  30   AIC:                             187.0
Df Residuals:                      28   BIC:                             189.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -17.2551      9.536     -1.809      0.0

In [None]:
#Try strictly offensively

In [321]:
team_offensive_space_dict = {}
for team in all_rosters:
  team_id = team['TeamID'].values[0]
  team_offensive_space_dict[team_id] = generate_team_offensive_space(team["PLAYER"].tolist(), offensive_data)

In [329]:
maxdim = 1
team_id = []
offensive_mean_deaths = {}

for i in range(maxdim):
  offensive_mean_deaths[str(i)] = []

for team in team_offensive_space_dict.keys():
  result = ripser(team_offensive_space_dict[team], maxdim=maxdim, do_cocycles=True)
  diagrams = result['dgms']

  for i in range(maxdim):
    resolutions_at_death = diagrams[i][:, 1]
    offensive_mean_deaths[str(i)].append(np.mean([j for j in resolutions_at_death if j != np.inf]))

  team_id.append(team)

In [330]:
off_death_df = pd.DataFrame(offensive_mean_deaths)
off_death_df['id'] = team_id
off_death_df = off_death_df.merge(pd.json_normalize(teams.get_teams()), left_on="id", right_on="id")
off_death_df = off_death_df.drop(["nickname", "city", "state", "year_founded", "full_name"], axis=1)
e_ratings = pd.read_csv("e_ratings.csv")
e_ratings = e_ratings.rename(columns={'TEAM': 'abbreviation'})
off_death_df = off_death_df.merge(e_ratings, on="abbreviation")

In [331]:
off_death_df

Unnamed: 0.1,0,id,abbreviation,Unnamed: 0,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG
0,1.923153,1610612737,ATL,23,49,22,27,2372,115.2,117.8,-2.6
1,2.448827,1610612738,BOS,1,50,38,12,2425,118.2,109.0,9.1
2,2.211225,1610612739,CLE,5,47,31,16,2266,113.5,108.0,5.5
3,1.965984,1610612740,NOP,7,49,28,21,2357,115.2,111.0,4.2
4,1.625255,1610612741,CHI,19,50,23,27,2430,111.3,112.2,-0.9
5,2.099053,1610612742,DAL,17,49,26,23,2352,115.2,115.5,-0.3
6,1.967567,1610612743,DEN,10,51,35,16,2448,115.7,112.3,3.4
7,2.375221,1610612744,GSW,16,46,21,25,2238,115.1,115.0,0.1
8,2.120241,1610612745,HOU,12,49,23,26,2377,110.8,109.6,1.2
9,1.815849,1610612746,LAC,3,48,33,15,2309,118.0,110.8,7.2


In [332]:
import plotly.express as px
fig = px.scatter(off_death_df, x="0", y="OFFRTG", color="abbreviation")
fig.show()

In [333]:

import statsmodels.api as sm

# Prepare the data
X = off_death_df["0"].values.reshape(-1, 1)
y = off_death_df["OFFRTG"].values

# Add a constant term to the design matrix
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Print the model summary
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     1.426
Date:                Mon, 05 Feb 2024   Prob (F-statistic):              0.242
Time:                        20:10:36   Log-Likelihood:                -80.905
No. Observations:                  30   AIC:                             165.8
Df Residuals:                      28   BIC:                             168.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        105.9457      5.996     17.668      0.0

In [None]:
#Defensive rating

In [335]:
team_defensive_space_dict = {}
for team in all_rosters:
  team_id = team['TeamID'].values[0]
  team_defensive_space_dict[team_id] = generate_team_defensive_space(team["PLAYER"].tolist(), defensive_data)

In [336]:
maxdim = 1
team_id = []
def_mean_deaths = {}

for i in range(maxdim):
  def_mean_deaths[str(i)] = []

for team in team_offensive_space_dict.keys():
  result = ripser(team_defensive_space_dict[team], maxdim=maxdim, do_cocycles=True)
  diagrams = result['dgms']

  for i in range(maxdim):
    resolutions_at_death = diagrams[i][:, 1]
    def_mean_deaths[str(i)].append(np.mean([j for j in resolutions_at_death if j != np.inf]))

  team_id.append(team)

In [338]:
def_death_df = pd.DataFrame(def_mean_deaths)
def_death_df['id'] = team_id
def_death_df = def_death_df.merge(pd.json_normalize(teams.get_teams()), left_on="id", right_on="id")
def_death_df = def_death_df.drop(["nickname", "city", "state", "year_founded", "full_name"], axis=1)
e_ratings = pd.read_csv("e_ratings.csv")
e_ratings = e_ratings.rename(columns={'TEAM': 'abbreviation'})
def_death_df = def_death_df.merge(e_ratings, on="abbreviation")

In [340]:
def_death_df

Unnamed: 0.1,0,id,abbreviation,Unnamed: 0,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG
0,2.386499,1610612737,ATL,23,49,22,27,2372,115.2,117.8,-2.6
1,1.273116,1610612738,BOS,1,50,38,12,2425,118.2,109.0,9.1
2,1.556037,1610612739,CLE,5,47,31,16,2266,113.5,108.0,5.5
3,1.766305,1610612740,NOP,7,49,28,21,2357,115.2,111.0,4.2
4,1.792929,1610612741,CHI,19,50,23,27,2430,111.3,112.2,-0.9
5,2.02583,1610612742,DAL,17,49,26,23,2352,115.2,115.5,-0.3
6,1.465308,1610612743,DEN,10,51,35,16,2448,115.7,112.3,3.4
7,1.468428,1610612744,GSW,16,46,21,25,2238,115.1,115.0,0.1
8,1.356148,1610612745,HOU,12,49,23,26,2377,110.8,109.6,1.2
9,1.425765,1610612746,LAC,3,48,33,15,2309,118.0,110.8,7.2


In [341]:
import plotly.express as px
fig = px.scatter(def_death_df, x="0", y="DEFRTG", color="abbreviation")
fig.show()

In [342]:
import statsmodels.api as sm

# Prepare the data
X = def_death_df["0"].values.reshape(-1, 1)
y = def_death_df["DEFRTG"].values

# Add a constant term to the design matrix
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Print the model summary
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                 -0.017
Method:                 Least Squares   F-statistic:                    0.5117
Date:                Mon, 05 Feb 2024   Prob (F-statistic):              0.480
Time:                        20:16:07   Log-Likelihood:                -77.575
No. Observations:                  30   AIC:                             159.2
Df Residuals:                      28   BIC:                             162.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        115.1755      3.069     37.525      0.0

In [None]:
#Merge data


In [370]:
death_df.rename(columns={"0": "NET_MEAN_H0_DEATH"}, inplace=True)
off_death_df.rename(columns={"MEAN_H0_DEATH": "OFF_MEAN_H0_DEATH"}, inplace=True)
def_death_df.rename(columns={"MEAN_H0_DEATH": "DEF_MEAN_H0_DEATH"}, inplace=True)

final_df = death_df.merge(off_death_df, on="id")
final_df = final_df.merge(def_death_df, on="id")
final_df["W_PCT"] = final_df["W"] / (final_df["W"] + final_df["L"])

In [371]:
final_df.columns

Index(['NET_MEAN_H0_DEATH', 'id', 'std', 'abbreviation_x', 'Unnamed: 0_x',
       'GP_x', 'W_x', 'L_x', 'MIN_x', 'OFFRTG_x', 'DEFRTG_x', 'NETRTG_x',
       'OFF_MEAN_H0_DEATH', 'abbreviation_y', 'Unnamed: 0_y', 'GP_y', 'W_y',
       'L_y', 'MIN_y', 'OFFRTG_y', 'DEFRTG_y', 'NETRTG_y', 'DEF_MEAN_H0_DEATH',
       'abbreviation', 'Unnamed: 0', 'GP', 'W', 'L', 'MIN', 'OFFRTG', 'DEFRTG',
       'NETRTG', 'W_PCT'],
      dtype='object')

In [373]:
cols = [
    "NET_MEAN_H0_DEATH",
    "OFF_MEAN_H0_DEATH",
    "DEF_MEAN_H0_DEATH",
    "id",
    "abbreviation_x",
    "W_x",
    "L_x",
    "OFFRTG_x",
    "DEFRTG_x",
    "NETRTG_x",
    "W_PCT"
]
final_df = final_df[cols]

In [374]:
final_df.to_csv("PERSISTENCE_MEANS.csv")