In [2]:
import numpy as np
import pandas as pd
from nba_api.stats.static import players
from nba_api.stats.endpoints import playerestimatedmetrics
import unicodedata
import random
from gekko import GEKKO

In [3]:
#Params
k = 13.91

**utils**

In [4]:
def select_n_from_k(k, n):
  k_copy = list(k)
  random.shuffle(k_copy)
  return k_copy[:n]

**Calculate player costs as max of their % salary cap and comparable player % salary cap**

In [5]:
def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                   if unicodedata.category(c) != 'Mn')

In [6]:
def get_all_active_players():
  active_players = pd.json_normalize(players.get_active_players())
  return active_players

In [7]:
def get_epm_ranks(fname="epm_data.csv"):
  epm_data = pd.read_csv(fname)
  epm_data['epm_rank'] = epm_data['epm'].rank(method='min', ascending=False).astype('int')
  return epm_data[["nba_id", "name", "epm_rank"]]

In [8]:
def get_pct_salary_cap(fname="salary_data.csv", season="2023-24", salary_cap=123000000):
  salary_data = pd.read_csv(fname)
  salary_data[season] = salary_data[season].str[1:]
  salary_data[season] = salary_data[season].str.replace(',', '', regex=False)
  salary_data = salary_data[salary_data[season].notna()]
  salary_data[season] = salary_data[season].astype(int)
  salary_data["pct_salary_cap"] = salary_data[season] / salary_cap

  return salary_data[["Player", "pct_salary_cap"]]

In [9]:
def get_player_comparable_salary_cap():

  epm_ranks = get_epm_ranks()
  pct_salary_cap_data = get_pct_salary_cap()

  ranks = []

  for index, row in pct_salary_cap_data.iterrows():
    try:
      player_name = strip_accents(row["Player"])
      epm_rank = epm_ranks[epm_ranks["name"] == player_name]["epm_rank"].iloc[0]
      ranks.append(epm_rank)
    except:
      ranks.append(np.nan)

  pct_salary_cap_data["epm_rank"] = ranks
  pct_salary_cap_data = pct_salary_cap_data.dropna()

  pct_salary_cap_data_sorted = pct_salary_cap_data.sort_values(by='epm_rank').reset_index(drop=True)

  pct_salary_cap_data_sorted['AvgSalaryCapSurroundingCorrected'] = np.nan

  for i in range(len(pct_salary_cap_data_sorted)):
      surrounding_players = pct_salary_cap_data_sorted['pct_salary_cap'].iloc[max(i-3, 0):min(i+4, len(pct_salary_cap_data_sorted))].tolist()
      if surrounding_players:
          pct_salary_cap_data_sorted.at[i, 'AvgSalaryCapSurroundingCorrected'] = sum(surrounding_players) / len(surrounding_players)

  return pct_salary_cap_data_sorted

In [10]:
def get_player_costs():
  pct_salary = get_player_comparable_salary_cap()
  pct_salary['c'] = np.maximum(pct_salary['AvgSalaryCapSurroundingCorrected'], pct_salary['pct_salary_cap'])

  return pct_salary[["Player", "c"]]

In [11]:
def get_estimated_off_rating(player_names, season="2023-24"):
  estimated_metrics = playerestimatedmetrics.PlayerEstimatedMetrics(season=season, league_id="00", season_type="Regular Season")
  estimated_metrics = estimated_metrics.get_data_frames()[0]

  eo = []
  ed = []
  for player in player_names:
    cleaned_name = strip_accents(player)
    e_off_rtg = estimated_metrics[estimated_metrics["PLAYER_NAME"] == cleaned_name]["E_OFF_RATING"].iloc[0]
    e_def_rtg = estimated_metrics[estimated_metrics["PLAYER_NAME"] == cleaned_name]["E_DEF_RATING"].iloc[0]
    eo.append(float(e_off_rtg) / 100.0)
    ed.append(float(e_def_rtg) / 100.0)

  out = {
      "Player": player_names,
      "eo": eo,
      "ed": ed
  }

  return pd.DataFrame(out)

In [12]:
def add_eo_ed(player_costs_df, season="2023-24"):
  e_mets_df = get_estimated_off_rating(player_costs["Player"].tolist(), season=season)
  player_costs_df = player_costs_df.merge(e_mets_df, on="Player")
  return player_costs_df

**Next, calculate the number of possessions per game for players**

In [13]:
def get_poss_per_game(fname="usage_data.csv"):
  usage_data = pd.read_csv(fname)
  usage_data["poss_per_game"] = usage_data["POSS"] / usage_data["GP"]
  return usage_data[["PLAYER", "poss_per_game"]]

In [14]:
def add_np(player_df):
  poss_per_game = get_poss_per_game()

  np = []
  for index, row in player_df.iterrows():
    player_name = strip_accents(row["Player"])
    poss_per_game_val = poss_per_game[poss_per_game["PLAYER"] == player_name]["poss_per_game"].iloc[0]
    np.append(poss_per_game_val)

  player_df["np"] = np
  return player_df

**Finally, calculate league-wide team average possessions per game**

In [15]:
def calculate_league_poss_per_game(fname="team_possession_data.csv"):
  team_possession_data = pd.read_csv(fname).dropna()
  team_possession_data["poss_per_game"] = team_possession_data["POSS"].str.replace(",","", regex=False).astype(int) / team_possession_data["GP"]
  return np.mean(team_possession_data["poss_per_game"])

In [16]:
player_costs = get_player_costs()
player_df = add_eo_ed(player_costs)
player_df = add_np(player_df)
team_np = calculate_league_poss_per_game()

In [None]:
team_np

100.14406309195043

In [None]:
player_df

Unnamed: 0,Player,c,eo,ed,np
0,Joel Embiid,0.387052,1.188,1.092,71.441176
1,Shai Gilgeous-Alexander,0.348465,1.204,1.093,73.466667
2,Giannis Antetokounmpo,0.371058,1.195,1.121,74.745763
3,Luka Dončić,0.325725,1.182,1.152,79.314815
4,Nikola Jokić,0.387052,1.201,1.104,70.066667
...,...,...,...,...,...
453,AJ Griffin,0.030186,1.115,1.200,16.555556
454,Johnny Davis,0.041063,1.047,1.109,21.666667
455,Ish Smith,0.027255,1.018,1.186,36.023256
456,Maxwell Lewis,0.025363,0.792,1.129,7.857143


**Brute force won't work - way too many combos**

**Finally, we are ready to set up the constrained optimization problem.**

In [18]:
n = len(player_df)
bounds = [(0, 1)] * n
print("Number of players: " + str(n))

Number of players: 458


In [17]:
def objective_function(x, player_df):
  pts_added = 0
  pts_given = 0

  for i in range(n):
    player_data = player_df.iloc[i]

    term = float(player_data["eo"]) * float(player_data["np"]) * float(x[i])
    pts_added += term

    term = float(player_data["ed"]) * float(player_data["np"]) * float(x[i])
    pts_given += term

  pts_added = pts_added ** 13.91
  pts_given = pts_given ** 13.91

  return (pts_added) / (pts_added + pts_given)

In [None]:
m = GEKKO(remote=False)
salaries = player_df["c"].tolist()
poss = player_df["np"].tolist()

#x = [m.Var(lb=0, ub=1, integer=True) for _ in range(n)]
x = [m.Var(value=1 if i < 15 else 0, lb=0, ub=1, integer=True) for i in range(n)]
print(x)

pts_added = m.Intermediate(1)
pts_given = m.Intermediate(1)

for i in range(n):
    player_data = player_df.iloc[i]
    eo = player_data["eo"]
    np = player_data["np"]
    ed = player_data["ed"]

    #Intermediate terms for readability
    term_added = m.Intermediate(eo * np * x[i])
    term_given = m.Intermediate(ed * np * x[i])

    #Update pts_added and pts_given
    pts_added = m.Intermediate(pts_added + term_added)
    pts_given = m.Intermediate(pts_given + term_given)

pts_added_powered = m.Intermediate(pts_added**13.91)
pts_given_powered = m.Intermediate(pts_given**13.91)

objective = pts_added_powered / (pts_added_powered + pts_given_powered)

m.Maximize(objective)

m.Equation(m.sum(x) == 15)
salaries_var = [m.Const(value=salaries[i]) for i in range(n)]
m.Equation(m.sum([salaries_var[i] * x[i] for i in range(n)]) <= 1)

#Add play time constraint
poss_var = [m.Const(value=poss[i]) for i in range(n)]
m.Equation(m.sum([poss_var[i] * x[i] for i in range(n)]) >= 5*team_np)

m.options.SOLVER=1
m.solve(disp=True)
solution = [x[i].value[0] for i in range(n)]




[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [None]:
for i in range(len(solution)):
  if solution[i] == 1.0:
    print(player_df.iloc[i])

Player    Jose Alvarado
c              0.064917
eo                1.168
ed                1.031
np            35.634146
Name: 92, dtype: object
Player    Trey Murphy III
c                 0.06752
eo                  1.166
ed                  1.055
np              56.567568
Name: 98, dtype: object
Player    Kentavious Caldwell-Pope
c                         0.119552
eo                           1.207
ed                           1.098
np                       65.107143
Name: 102, dtype: object
Player    Dean Wade
c           0.12819
eo            1.141
ed            1.022
np        41.730769
Name: 103, dtype: object
Player    Al Horford
c             0.1099
eo             1.197
ed             1.076
np         55.313725
Name: 152, dtype: object
Player    Neemias Queta
c              0.081953
eo                1.287
ed                1.128
np            24.913043
Name: 154, dtype: object
Player    Ryan Rollins
c             0.080131
eo                1.18
ed               0.814
np        

In [None]:
objective_function(solution, player_df)

0.8261131241672858

**Construct model to work with players already on roster and limited other players available**

In [None]:
player_list = player_df["Player"].tolist()

In [None]:
fixed_players = [
]
available_players = [i for i in pd.read_csv("FreeAgents.csv")["Player"].tolist() if i in player_list]

In [None]:
refined_player_df = player_df[player_df["Player"].isin(available_players) | player_df["Player"].isin(fixed_players)]

In [None]:
refined_player_list = refined_player_df["Player"].tolist()
fixed_players_indices = [refined_player_list.index(i) for i in fixed_players]

In [None]:
fixed_players_indices

[]

In [None]:
m = GEKKO(remote=False)
n = len(refined_player_df)
salaries = refined_player_df["c"].tolist()
poss = refined_player_df["np"].tolist()

#x = [m.Var(lb=0, ub=1, integer=True) for _ in range(n)]
#x = [m.Var(value=1 if i < 15 else 0, lb=0, ub=1, integer=True) for i in range(n)]
x = [m.Var(value=1 if i in fixed_players_indices else 0, lb=0, ub=1, integer=True) for i in range(n)]
print(x)

pts_added = m.Intermediate(1)
pts_given = m.Intermediate(1)

for i in range(n):
    player_data = player_df.iloc[i]
    eo = player_data["eo"]
    np = player_data["np"]
    ed = player_data["ed"]

    #Intermediate terms for readability
    term_added = m.Intermediate(eo * np * x[i])
    term_given = m.Intermediate(ed * np * x[i])

    #Update pts_added and pts_given
    pts_added = m.Intermediate(pts_added + term_added)
    pts_given = m.Intermediate(pts_given + term_given)

pts_added_powered = m.Intermediate(pts_added**13.91)
pts_given_powered = m.Intermediate(pts_given**13.91)

objective = pts_added_powered / (pts_added_powered + pts_given_powered)

m.Maximize(objective)

#Add roster size constraint
m.Equation(m.sum(x) <= 15)
m.Equation(m.sum(x) >= 12)

#Add salary cap constraint
salaries_var = [m.Const(value=salaries[i]) for i in range(n)]
m.Equation(m.sum([salaries_var[i] * x[i] for i in range(n)]) <= 1.5)

#Add minimum salary constraint
m.Equation(m.sum([salaries_var[i] * x[i] for i in range(n)]) >= .9)

#Add play time constraint
#poss_var = [m.Const(value=poss[i]) for i in range(n)]
#m.Equation(m.sum([poss_var[i] * x[i] for i in range(n)]) >= 5*team_np)

#Add fixed players constraint
for idx in fixed_players_indices:
    m.Equation(x[idx] == 1)

m.options.SOLVER=1
m.solve(disp=True)
solution = [x[i].value[0] for i in range(n)]




[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 ----------------------------------------------------------------
 APMonitor, Version 1.0.1
 APMonitor Optimization Suite
 ----------------------------------------------------------------
 
 
 --------- APM Model Size ------------
 Each time step contains
   Objects      :            4
   Constants    :          153
   Variables    :          467
   Intermediates:          616
   Connections  :          616
   Equations    :          927
   Residuals    :          311
 
 Number of state variables:            467
 Number of total equat

In [None]:
for i in range(len(solution)):
  if solution[i] == 1.0:
    print(refined_player_df.iloc[i])

Player    Paul George
c            0.371058
eo              1.202
ed              1.102
np          69.272727
Name: 10, dtype: object
Player    Jalen Smith
c            0.096986
eo              1.165
ed              1.189
np          37.568182
Name: 68, dtype: object
Player    Jrue Holiday
c             0.299689
eo               1.199
ed               1.098
np           68.339286
Name: 74, dtype: object
Player    Andre Drummond
c               0.073517
eo                 1.134
ed                  1.14
np             35.147541
Name: 89, dtype: object
Player    Jose Alvarado
c              0.064917
eo                1.168
ed                1.031
np            35.634146
Name: 92, dtype: object
Player    Eric Gordon
c            0.077677
eo              1.158
ed               1.12
np          62.098039
Name: 234, dtype: object
Player    Lamar Stevens
c               0.03558
eo                1.028
ed                1.025
np                22.75
Name: 287, dtype: object
Player    Bismack Bi

In [None]:
objective_function(solution, refined_player_df)

0.5941173957635177

**Soft cap model with Bird Rights**

In [19]:
player_list = player_df["Player"].tolist()

In [20]:
fixed_players = [
]
available_players = [i for i in pd.read_csv("FreeAgents.csv")["Player"].tolist() if i in player_list]

In [21]:
refined_player_df = player_df[player_df["Player"].isin(available_players) | player_df["Player"].isin(fixed_players)]

In [22]:
refined_player_list = refined_player_df["Player"].tolist()
fixed_players_indices = [refined_player_list.index(i) for i in fixed_players]

In [23]:
fixed_players_indices

[]

In [25]:
m = GEKKO(remote=False)
n = len(refined_player_df)
salaries = refined_player_df["c"].tolist()
poss = refined_player_df["np"].tolist()

#x = [m.Var(lb=0, ub=1, integer=True) for _ in range(n)]
#x = [m.Var(value=1 if i < 15 else 0, lb=0, ub=1, integer=True) for i in range(n)]
x = [m.Var(value=1 if i in fixed_players_indices else 0, lb=0, ub=1, integer=True) for i in range(n)]
print(x)

pts_added = m.Intermediate(1)
pts_given = m.Intermediate(1)

for i in range(n):
    player_data = player_df.iloc[i]
    eo = player_data["eo"]
    np = player_data["np"]
    ed = player_data["ed"]

    #Intermediate terms for readability
    term_added = m.Intermediate(eo * np * x[i])
    term_given = m.Intermediate(ed * np * x[i])

    #Update pts_added and pts_given
    pts_added = m.Intermediate(pts_added + term_added)
    pts_given = m.Intermediate(pts_given + term_given)

pts_added_powered = m.Intermediate(pts_added**13.91)
pts_given_powered = m.Intermediate(pts_given**13.91)

objective = pts_added_powered / (pts_added_powered + pts_given_powered)

m.Maximize(objective)

#Add roster size constraint
m.Equation(m.sum(x) <= 15)
m.Equation(m.sum(x) >= 12)

#Add salary cap constraint
salaries_var = [m.Const(value=salaries[i]) for i in range(n)]
m.Equation(m.sum([salaries_var[i] * x[i] for i in range(n) if i not in fixed_players_indices]) <= 1.5)


#Add minimum salary constraint
m.Equation(m.sum([salaries_var[i] * x[i] for i in range(n)]) >= .9)

#Add play time constraint
#poss_var = [m.Const(value=poss[i]) for i in range(n)]
#m.Equation(m.sum([poss_var[i] * x[i] for i in range(n)]) >= 5*team_np)

#Add fixed players constraint
for idx in fixed_players_indices:
    m.Equation(x[idx] == 1)

m.options.SOLVER=1
m.solve(disp=True)
solution = [x[i].value[0] for i in range(n)]




[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 ----------------------------------------------------------------
 APMonitor, Version 1.0.1
 APMonitor Optimization Suite
 ----------------------------------------------------------------
 
 
 --------- APM Model Size ------------
 Each time step contains
   Objects      :            4
   Constants    :          153
   Variables    :          467
   Intermediates:          616
   Connections  :          616
   Equations    :          927
   Residuals    :          311
 
 Number of state variables:            467
 Number of total equat

In [26]:
for i in range(len(solution)):
  if solution[i] == 1.0:
    print(refined_player_df.iloc[i])

Player    Paul George
c            0.371058
eo                1.2
ed                1.1
np          69.272727
Name: 10, dtype: object
Player    Jalen Smith
c            0.096986
eo              1.164
ed              1.185
np          37.568182
Name: 68, dtype: object
Player    Jrue Holiday
c             0.299689
eo               1.196
ed               1.098
np           68.339286
Name: 74, dtype: object
Player    Jose Alvarado
c              0.064917
eo                 1.18
ed                1.033
np            35.634146
Name: 92, dtype: object
Player    Eric Gordon
c            0.077677
eo              1.156
ed              1.123
np          62.098039
Name: 234, dtype: object
Player    Lamar Stevens
c               0.03558
eo                1.019
ed                1.016
np                22.75
Name: 287, dtype: object
Player    Bismack Biyombo
c                0.051473
eo                  1.084
ed                  1.128
np                   45.0
Name: 298, dtype: object
Player    Thad

In [27]:
objective_function(solution, refined_player_df)

0.5920209983940311