## Code For A Balanced Multiteam Formation Problem under Synergy Superstar and Preference Considerations

In [53]:
## translation dictionaries
from pid2pos_bref2nba_nba2bref_pid2name_name2pid import *
from fivethirtyeightDataDicts import *

In [54]:
from unidecode import unidecode
def drop_accent(item):
    try:
        return unidecode(item)
    except AttributeError:
        return item

## data
import numpy as np
import pandas as pd
import csv
from collections import Counter

## plotting packages
import plotly.graph_objects as go
import plotly.figure_factory as ff

## analysis
from scipy.stats.mstats import hmean
from sklearn.preprocessing import StandardScaler

## Appendix A - Figure 10

data from https://projects.fivethirtyeight.com/2020-nba-player-projections/

In [35]:
clust = pd.read_csv('cluster538_2020_full.csv').drop(columns=['Unnamed: 0']).dropna()
clust['name'] = clust['name'].apply(drop_accent)
boole = clust.name.isin(name2pid)
clust = clust[boole]
clust['pid'] = clust['name'].replace(name2pid)
pid2clust = dict(zip(clust.pid,clust.cluster))

counts = pd.DataFrame(clust.value_counts('cluster')).reset_index()
counts.columns = ['cluster', 'counts']
fig = go.Figure(data=[go.Bar(
        y=counts['cluster'],x=counts['counts'],
    orientation='h',marker_color='navy',
    text=counts['counts'],textposition='auto',textangle = 0,
    )])

fig.update_layout(
    xaxis_title="Count",
    font=dict(
        family="Computer Modern",
        size=13,
    )
)
fig.update_yaxes(
{'automargin':True,'categoryorder':'total descending'}
)

fig.show()

## Calculate Cluster Synergy

data from https://www.kaggle.com/datasets/schmadam97/nba-playbyplay-data-20182019

In [12]:
pbp = pd.read_csv('/u/au/sa/meganmuniz/env1/Research/TakeTwo/kagglePBP/NBA_PBP_2019-20.csv')
pbp['Season'] = '2019-20'
pbp = pbp[(pbp.GameType=='regular')].reset_index() # regular season only

# print(pbp.columns)
# pbp.head()

In [13]:
## rebound to score ##
rebs = pbp[~(pbp.Rebounder.isna())][['Rebounder','Season']]
rebTemp = pbp.iloc[pbp[~(pbp.Rebounder.isna())].index+1]
scoreReb = rebTemp[(rebTemp.ShotOutcome=='make')][['Shooter']]
boole = (rebs.index+1).isin(list(scoreReb.index))
rebs = rebs[boole].reset_index()
reboundScore = pd.concat([rebs,scoreReb.reset_index()],axis=1)#.replace(pid2pbp)
reboundScore.columns = ['rebIndex', 'Player1','Season', 'asstIndex', 'Player2']
# reboundScore

In [14]:
## rebound to assist ##
rebs = pbp[~(pbp.Rebounder.isna())][['Rebounder','Season']]
rebTemp = pbp.iloc[pbp[~(pbp.Rebounder.isna())].index+1]
rebAsst = rebTemp[~(rebTemp.Assister.isna())][['Assister']]
boole = (rebs.index+1).isin(list(rebAsst.index))
rebs = rebs[boole].reset_index()
reboundAssist = pd.concat([rebs,rebAsst.reset_index()],axis=1)#.replace(pid2pbp)
reboundAssist.columns = ['rebIndex', 'Player1','Season', 'asstIndex', 'Player2']
# reboundAssist

In [15]:
## steal to score ##
stls = pbp[(pbp.TurnoverCause=='steal')][['TurnoverCauser', 'Season']]
stlTemp = pbp.iloc[stls.index+1]
scoreStl = stlTemp[(stlTemp.ShotOutcome=='make')][['Shooter']]
boole = (stls.index+1).isin(list(scoreStl.index))
stls = stls[boole].reset_index()
stealScore = pd.concat([stls,scoreStl.reset_index()],axis=1)#.dropna()#replace(pid2pbp)
stealScore.columns = ['stlIndex', 'Player1', 'Season', 'shootIndex', 'Player2']
# stealScore

In [16]:
## steal to assist ##
stls = pbp[(pbp.TurnoverCause=='steal')][['TurnoverCauser', 'Season']]
stlTemp = pbp.iloc[stls.index+1]
stlAsst = stlTemp[~(stlTemp.Assister.isna())][['Assister']]
boole = (stls.index+1).isin(list(stlAsst.index))
stls = stls[boole].reset_index()
stealAssist = pd.concat([stls,stlAsst.reset_index()],axis=1)#.dropna()#replace(pid2pbp)
stealAssist.columns = ['stlIndex', 'Player1', 'Season', 'asstIndex', 'Player2']
# stealAssist

In [17]:
## assists ##
assist = pbp[~(pbp.Assister.isna())][['Assister','Shooter', 'Season']]
## make sure we don't double count the ones that are after rebounds or steals
dropStl = assist.index.isin(stealAssist.asstIndex)
assist = assist[~dropStl]
dropReb = assist.index.isin(reboundAssist.asstIndex)
assist = assist[~dropReb]
assist.columns = ['Player1', 'Player2', 'Season']
# assist#.replace(pid2pbp)

In [18]:
## count direct positive interactions ##
type1 = ['rebound', 'steal']
type2 = ['Score','Assist']
# positiveInteractions = {}

positiveList = []

for t1 in type1:
    for t2 in type2:
        df = eval(t1+t2)
        df = df[(df.Player1 != df.Player2)]
        df = df.dropna()
        temp1 = df.Player1.str.split(' - ',expand=True).replace(bref2nba)#[1]
        temp2 = df.Player2.str.split(' - ',expand=True).replace(bref2nba)#[1]
        newdf = pd.concat([temp1,temp2],axis=1)#.replace(pid2gpAll)
        newdf.columns = ['name1','pid1','name2','pid2']
        positiveList.extend(list(zip(newdf.pid1,newdf.pid2)))


In [19]:
## add assists
df = assist[(assist.Player1 != assist.Player2)]
df.dropna(inplace=True)
temp1 = df.Player1.str.split(' - ',expand=True).replace(bref2nba)#[1]
temp2 = df.Player2.str.split(' - ',expand=True).replace(bref2nba)#[1]
newdf = pd.concat([temp1,temp2],axis=1)
newdf.columns = ['name1','pid1','name2','pid2']
positiveList.extend(list(zip(newdf.pid1,newdf.pid2)))

In [23]:
## order pairs then use counter to create positiveInteractions
## take out if not in feasible combo (unrecognized)
# positiveList

# positiveList_ = [tuple(sorted(x)) for x in positiveList]
positiveList_ = []
for pair in positiveList:
    try:
        
        positiveList_.append(tuple(sorted(pair)))
    except:
        continue


positiveInteractions = dict(Counter(positiveList_))
# positiveInteractions

In [24]:
# create df: pair to positive interaction
archSyn = pd.DataFrame().from_dict(positiveInteractions,orient='index').reset_index()
archSyn['p1'], archSyn['p2'] = zip(*archSyn['index'])
archSyn.dropna(inplace=True)

In [27]:
## get possessions per pair
newBase = pd.read_csv('/u/au/sa/meganmuniz/env1/Research/TakeTwo/leaguedashlineup/lineupStats_Advanced_2man_allSeasons(per100Poss).csv')
newBase.drop(columns = 'Unnamed: 0',inplace=True)

base = newBase['GROUP_ID'].str.split('-',expand=True)
base[3] = newBase['Season'] #6
base = base[(base.columns[1:])]
base.columns = ['p1','p2','szn']
base['poss'] = newBase['POSS']
base = base[(base.szn=='2019-20')]
base['p1'] = base['p1'].astype(int)
base['p2'] = base['p2'].astype(int)

baseList = list(zip(base.p1,base.p2))
baseList = [tuple(sorted(x)) for x in baseList]
possDict = dict(zip(baseList,base.poss))

## get positive interaction/poss for each indy pair using newbase,
posIntPerPoss = {}
for key,val in positiveInteractions.items():
    try:
        posIntPerPoss[key] = round(val / possDict[key], 4)
    except:
        continue
# posIntPerPoss

In [37]:
interactions = pd.DataFrame()
for idx, row in archSyn.iterrows():
    tup = tuple(sorted(tuple((row.p1,row.p2))))
    try:
        a1 = pid2clust[row.p1]
        a2 = pid2clust[row.p2]
        if a1 == 'Singleton' or a2 == 'Singleton':
            continue
        arch1,arch2 = sorted(tuple([a1,a2]))
        interactions = interactions.append({'Arch1': arch1, 'Arch2': arch2, 'Value': posIntPerPoss[tup]},ignore_index=True)
    except:
        continue

interactions['lineupGroup'] = interactions['Arch1'] + '$' + interactions['Arch2']

hmeanInters = interactions.groupby('lineupGroup').agg({'Value':hmean}).reset_index()
sortedHmean = hmeanInters.sort_values(by='Value',ascending=False)
gp2synergy = dict(zip(sortedHmean.lineupGroup,round(sortedHmean['Value']*100,3))) 

new = sortedHmean['lineupGroup'].str.split('$',expand=True)
finalSynergy = pd.concat([new[0],new[1], round(sortedHmean['Value']*100,3)],axis=1)
finalSynergy.columns = ['Archetype 1', 'Archetype 2', 'Synergy']

tup2syn = dict(zip(zip(finalSynergy['Archetype 1'],finalSynergy['Archetype 2']),finalSynergy['Synergy']))

finalSynergy


Unnamed: 0,Archetype 1,Archetype 2,Synergy
0,ALL-STAR,ALL-STAR,8.570
64,FUTURE ALL-STAR,FUTURE ALL-STAR,7.060
69,FUTURE ALL-STAR,MVP CANDIDATE,6.592
95,GREAT PROSPECT,OK PROSPECT,6.190
119,MVP CANDIDATE,OK PROSPECT,6.160
...,...,...,...
57,DEFENSIVE SPECIALIST,SCRAPPY VETERAN,1.550
54,DEFENSIVE SPECIALIST,OK PROSPECT,1.470
90,GOOD STARTER,SCRUB,1.426
78,GOOD PROSPECT,KEY ROLE PLAYER,1.330


In [40]:
## Appendix A - Figure 11
synergyPivot  = finalSynergy.pivot_table(index='Archetype 1', 
                    columns='Archetype 2', 
                    values='Synergy')
synergyPivot
idx2nan = {}
for idx,row in synergyPivot.iterrows():
#     print(sum(row))
#     print(sum(np.isnan(row)))
    idx2nan[idx] = sum(np.isnan(row))
colOrder = [k for k, v in sorted(idx2nan.items(), key=lambda item: item[1])]
synergyPivot = synergyPivot[colOrder]
synergyPivot = synergyPivot.reindex(colOrder)

z_text = []

for q, arr in enumerate(synergyPivot.values):
    z_text.append([str(h) if ~np.isnan(h) else "" for h in arr])

fig = ff.create_annotated_heatmap(synergyPivot.values, x=colOrder, y=colOrder,annotation_text=z_text,showscale=True, colorscale='oxy')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig['layout'].update(plot_bgcolor='white')
fig['layout']['yaxis']['autorange'] = "reversed"
fig['layout']['xaxis']['autorange'] = "reversed"
font_ = "Computer Modern"
fig.update_xaxes(tickangle=45)
fig.update_layout(
    font_family=font_,
    title_font_family=font_,
)
fig.show()

## Predict Draftee Values

data from https://www.basketball-reference.com/draft/NBA_{yr}.html

where yr == 2001-2020

In [41]:
masterDraft = pd.read_csv('masterDraft.csv')

stat = 'WS/48'
dftRes = masterDraft.groupby('Pk').mean()[stat].reset_index()
shift = abs(dftRes[stat].min())
dftRes['shift'] = dftRes[stat] + shift + 1

x = dftRes['Pk']
y = dftRes[stat]

In [42]:
## estimate ws draft curve
def polyfit(x, y, degree):
    results = {}

    coeffs = np.polyfit(x, y, degree)

     # Polynomial Coefficients
    results['polynomial'] = coeffs.tolist()

    # r-squared
    p = np.poly1d(coeffs)
    # fit values, and mean
    yhat = p(x)                         # or [p(z) for z in x]
    ybar = np.sum(y)/len(y)          # or sum(y)/len(y)
    ssreg = np.sum((yhat-ybar)**2)   # or sum([ (yihat - ybar)**2 for yihat in yhat])
    sstot = np.sum((y - ybar)**2)    # or sum([ (yi - ybar)**2 for yi in y])
    results['determination'] = ssreg / sstot

    return results

#print(polyfit(x,y,3))

a,b,c,d = np.poly1d(np.polyfit(x, y, 3))
def poly_func(x,a,b,c,d):
    return a*x**3 + b*x**2 + c*x + d
## final model, r^2 0.4358
polyfit(x,y,3)

{'polynomial': [-3.7602667708170493e-07,
  4.3426787437222226e-05,
  -0.002590974341123658,
  0.10382855662540115],
 'determination': 0.43578622599882494}

In [48]:
## when using this function to calculate WS, 
## matching with 2020 draftees, estimate salary, 
## results in following draftee df
draft_df = pd.read_csv('masterDFT538.csv')
draft_df.drop(columns='Unnamed: 0', inplace=True)

## master data 

gathered from https://projects.fivethirtyeight.com/2020-nba-player-projections/ and https://www.basketball-reference.com/leagues/NBA_2020_advanced.html

In [50]:
## master data
master_df = pd.read_csv('masterData538.csv')

## Optimization

draft_df and master_df were used to create .dat files

model in cooperativeTrade.mod

In [55]:
# ## programs written by John Cox & Jamie Grymes at Colorado School of Mines
# # to save, read, and plot AMPL solutions
from amplpy import AMPL, Environment
from exportAMPL import *
from plotAMPL import *
# ############## if wanting to recreate solves ##############

# # actual == initial case study, actual willingness
# # indifferent == all zero
# # rand1 == three teams
# # rand2 == preference to actual team but indifferent between next 3
# # rand3 == completely random

# sensitivityScenario = ['_indifferent', '_rand1', '_rand2', '_rand3']
# ampStrings = ['_minus20', '_minus10', '_plus10', '_plus20']
# # for scene in sensitivityScenario:
# for scene in ['Actual']+ampStrings+sensitivityScenario:
# #     textStr = f'cooperativeMod_538_fa_{scene}' 
#     textStr = f'cooperativeMod_538_fa{scene}'
#     modFile = 'cooperativeTradeFix.mod'
#     optStr = 'oneHour_mipfocus1'
#     datFile = f'gameTheory_{textStr}.dat'

#     solFile = f'/resfiles/{textStr}_{optStr}.sol'
#     logFile = f'/logfiles/{textStr}_{optStr}.txt'

#     options = f"option gurobi_options 'logfile {logFile} mipfocus 1 timelim 3600';"

#     ampl = AMPL()
#     ampl.read(modFile)
#     ampl.readData(datFile)
#     ampl.eval('option solver gurobi;')
#     ampl.eval(options)
#     ampl.solve()
#     sol = saveSolution(ampl,  solFile)
#     ampl.close()

In [57]:
############## else if reading in solutions ##############
sensitivityScenario = ['_indifferent', '_rand1', '_rand2', '_rand3']
ampStrings = ['_minus20', '_minus10', '_plus10', '_plus20']
allScenarios = ['Actual']+ampStrings+sensitivityScenario
allSols = []
for scene in allScenarios:
#     textStr = f'cooperativeMod_538_fa_{scene}' 
    textStr = f'cooperativeMod_538_fa{scene}'
    modFile = 'cooperativeTrade.mod'
    optStr = 'oneHour_mipfocus1'
    datFile = f'gameTheory_{textStr}.dat'

    solFile = f'{textStr}_{optStr}.sol'
    sol = loadSolution(solFile)
    allSols.append(sol)

### begin analysis ###

In [59]:
# original teams
set_C = {}
for t in sol.T:
    for pid in sol.C[t].values[0].values:
        try:
            set_C[t].append(pid)
        except:
            set_C[t] = [pid]
            
historybref20 = historybref[(historybref.Season=='2019-20')]
def getTeamVal_fromActual_orig(df,team):
    temp = df[(df.Tm==team)]
    boole = temp.pid.isin(pid2clust)
    temp = temp[boole]
    v = round(np.mean(temp['WS/48']*100),3)
    return v

# # synergy 
TAG_orig = {}
SYN_orig = {}
VAL_orig = {}
for team,lineup in set_C.items():
    lineup_clust = [pid2clust[p] for p in lineup]
    tempDict = Counter(lineup_clust)
    TAG_orig[team] = {k:v for k,v in tempDict.items() if v!=0}
    SYN_orig[team] = returnSynergy(TAG_orig[team])
    VAL_orig[team] = getTeamVal_fromActual_orig(historybref20,team)

origDF = pd.DataFrame(columns=['team', 'origSynergy', 'origVal'])
origDF['team'] = VAL_orig.keys()
origDF['origSynergy'] = origDF['team'].copy().replace(SYN_orig)
origDF['origVal'] = origDF['team'].copy().replace(VAL_orig)

In [None]:
# get params from model 
v = pd.DataFrame(sol.v.reset_index())
pid2v = dict(zip(v['C_'],v['v'])) # pid to indy val, v

## changes based on scenario
# z = pd.DataFrame(sol.z.reset_index())
# pidTm2z = dict(zip(zip(z['F_'],z['T']),z['z'])) # (pid,tm) to pref, z

vBar = pd.DataFrame(sol.vBar.reset_index())
pid2vBar = dict(zip(vBar['S_'],vBar['vBar'])) # pid 2 econ val, vBar

m = pd.DataFrame(sol.m.reset_index())
pid2m = dict(zip(m['C_'],m['m'])) # pid 2 econ val, vBar

In [None]:
### free agent perspective ###
correctFA = {}
allTemps = {}
for IDX,sol in enumerate(allSols):

    z = pd.DataFrame(sol.z.reset_index())
    pidTm2z = dict(zip(zip(z['F_'],z['T']),z['z'])) # (pid,tm) to pref, z

    ## where do free agents go? and what are their preferences
    tempFA = pd.DataFrame(sol.X[(sol.X!=0)]).reset_index()#.replace(pid2name)
    tempFA.columns = ['from', 'to', 'player', 'X']
    tempFA = tempFA[(tempFA['from']=='i_f')]
    tempFA['pref'] = np.nan
    for idx, row in tempFA.iterrows():
        tup = (row['player'], row['to'])
        tempFA.loc[idx,'pref'] = pidTm2z[tup]
    tempFA['actual'] = np.nan
    for idx, row in tempFA.iterrows():
        tempFA.loc[idx,'actual'] = pid2actual[row['player']]
    allTemps[allScenarios[IDX]] = tempFA
    numFAcorrect = sum(tempFA['to'] == tempFA['actual'])
    correctFA[IDX] = tempFA[(tempFA['to'] == tempFA['actual'])].replace(pid2name)
    print(f'In scenario {allScenarios[IDX]}, there were {numFAcorrect} correct free agent assignments (out of {tempFA.shape[0]}).')
    
correctFAlist = []
for key,DF in correctFA.items():
    correctFAlist.extend(list(DF['player'].unique()))
display(Counter(correctFAlist))


In [None]:
## for free agent discussion in sensitivity analysis
for key,tempFA in allTemps.items():
    if 'rand' in key:
        print(f'Scenario {key}:\nAvg Pref: {np.mean(tempFA.pref):.3f}, std Pref: {np.std(tempFA.pref):.3f}, Total Pref (obj): {sum(tempFA.pref):.3f}')
        display(tempFA.value_counts('pref'))


In [None]:
## for draft analysis
minError, maxError, avgError, stdError = [],[],[],[]
errorDist = []
# for IDX,sol in enumerate(allSols):
sol = allSols[3]
tempDFT = pd.DataFrame(sol.Y[(sol.Y!=0)]).reset_index()
tempDFT.columns = ['Team', 'pid', 'ModelDraftNumber', 'Y']
tempDFT['Player'] = tempDFT['pid'].replace(pid2playerDft)
tempDFT['ActualDraftNumber'] = tempDFT['pid']+1
tempDFT['Error'] = tempDFT['ActualDraftNumber'] - tempDFT['ModelDraftNumber']
minError_, maxError_, avgError_, stdError_ = abs(tempDFT['Error']).min(), abs(tempDFT['Error']).max(), abs(tempDFT['Error']).mean(), abs(tempDFT['Error']).std()
minError.append(minError_)
maxError.append(maxError_)
avgError.append(avgError_)
stdError.append(stdError_)
errorDist.append(tempDFT.sort_values(by='ActualDraftNumber')['Error']) # sorted by actual
    

In [None]:
## Appendix B - Table 6
print(tempDFT[['Player', 'ModelDraftNumber', 'ActualDraftNumber']].sort_values(by='ModelDraftNumber').to_latex(index=False))

In [None]:
## Table 4 - Draft Results Sensitivity Analysis
# show distribution of errors for draft positions
for IDX,sol in enumerate(allSols):
    minError, maxError, avgError, stdError = [],[],[],[]
    errorDist = []
    # for IDX,sol in enumerate(allSols):

    tempDFT = pd.DataFrame(sol.Y[(sol.Y!=0)]).reset_index()
    tempDFT.columns = ['Team', 'pid', 'ModelDraftNumber', 'Y']
    tempDFT['Player'] = tempDFT['pid'].replace(pid2playerDft)
    tempDFT['ActualDraftNumber'] = tempDFT['pid']+1
    tempDFT['Error'] = tempDFT['ActualDraftNumber'] - tempDFT['ModelDraftNumber']
    minError_, maxError_, avgError_, stdError_ = tempDFT['Error'].min(), abs(tempDFT['Error']).max(), tempDFT['Error'].mean(), abs(np.std(tempDFT['Error']))#.std()
    minError.append(minError_)
    maxError.append(maxError_)
    avgError.append(avgError_)
    stdError.append(stdError_)
    errorDist.append(tempDFT.sort_values(by='ActualDraftNumber')['Error']) # sorted by actual

#     fig = go.Figure()
#     for idx,dist in enumerate(errorDist):
#         # visualize
#         fig.add_trace(go.Box(y=dist,boxmean=True,#'sd' 
#                              name='',marker_color='lightseagreen'))

#     fig.update_layout(showlegend=False,
#                      font=dict(family="Computer Modern",size=14),
#                      title = f'Draft Error {allScenarios[IDX]}')

#     fig.show()
    print(f'{allScenarios[IDX]}: {minError_, maxError_, avgError_, stdError_}')

In [None]:
## calculations
## get all results for solutions
master_allTeamGroupResults = {}
for IDX, sol in enumerate(allSols):
    allTeamGroupResults = {}
    Xdf = pd.DataFrame(sol.X).reset_index()
    Xdf.columns = ['from', 'to', 'player', 'status']
    for team in Xdf.to.unique():
        tmList = Xdf[((Xdf['from']==team)&(Xdf['status']==0))|((Xdf['to']==team)&(Xdf['status']==1))]['player'].unique()
        tmList = [pid2clust[pid] for pid in tmList]
        #         count = Counter(pd.DataFrame(tmList).replace(pid2clust)[0])
        try:
            allTeamGroupResults[team].extend(tmList)
        except:
            allTeamGroupResults[team] = list(tmList)
    master_allTeamGroupResults[allScenarios[IDX]] = allTeamGroupResults
    
    
master_teamAvgGroups = {}
for scene in allScenarios:
    allTeamGroupResults = master_allTeamGroupResults[scene]
    ## count on average (distribution) of clusters per team
    teamAvgGroups = {} # team: cluster distribution
    for team in allTeams:
        tempDict = Counter(allTeamGroupResults[team])
        teamAvgGroups[team] = {k:v  for k,v in tempDict.items()} # round for scenarios round(v/30)

    ## remove zeros
    teamAvgGroups_copy = {}
    for team in allTeams:
        for key,val in teamAvgGroups[team].items():
            if val == 0:
                continue
            else:
                try:
                    teamAvgGroups_copy[team][key] = val
                except:
                    teamAvgGroups_copy[team] = {key:val}  
    master_teamAvgGroups[scene] = teamAvgGroups_copy
                
# get b param vals
b_syn = {}
for g1 in sol.G_:
    for g2 in sol.G_:
        if g1 > g2:
            continue
        g1Name = gpname1920[g1]
        g2Name = gpname1920[g2]
        b_syn[(g1Name,g2Name)] = sol.b_synergy[g1,g2]
        
# create function to calculate average synergy, given lineup
def returnSynergy(lineup):
#     print(lineup)
    val = 0
    for g1,g2 in list(combinations(lineup,2)):
        if g1 == g2:
            val -= (1/b_syn[(g1,g1)])*np.exp(lineup[g1])
        elif (g1,g2) in b_syn:
            val += b_syn[(g1,g2)]*np.log((lineup[g1]+1)*(lineup[g2]+1))
        else:
            val += b_syn[(g2,g1)]*np.log((lineup[g1]+1)*(lineup[g2]+1))
            
                
#         try:
#             val += b_syn[(g1,g2)]
#         except:
#             try:
#                 val += b_syn[(g2,g1)]
#             except:
#                 continue
    return round(val/len(lineup),3)

# create dict of team to avg synergy
master_teamSyn = {}
for scene in allScenarios:
    tag = master_teamAvgGroups[scene]
    teamSyn = {}
    for team in allTeams:
        teamSyn[team] = returnSynergy(tag[team])   
    master_teamSyn[scene] = teamSyn

## calculate team value from sol
def getTeamVal_fromSol(solution,team):
    # DRAFT
    tempDFT = pd.DataFrame(solution.Y[(solution.Y!=0)]).reset_index()
    tempDFT.columns = ['Team', 'pid', 'ModelDraftNumber', 'Y']
    vals = list(tempDFT[(tempDFT.Team==team)]['pid'].replace(pid2v))
    
    # TRADE + FA
    tempMV = pd.DataFrame(solution.X[(solution.X!=0)]).reset_index()#.replace(pid2name)
    tempMV.columns = ['from', 'to', 'pid', 'X']
    vals.extend(tempMV[(tempMV.to==team)]['pid'].replace(pid2v))

    return round(np.mean(vals),3)

## calculate team val, synergy from actual (historybref)
historybref21 = historybref[(historybref.Season=='2020-21')]
def getTeamVal_fromActual(df,team):
    temp = df[(df.Tm==team)]
    boole = temp.pid.isin(pid2clust)
    temp = temp[boole]
    v = round(np.mean(temp['WS/48']*100),3)
    lineup = temp.pid.replace(pid2clust)
    s = returnSynergy(Counter(lineup))
    return v,s

objFcnVals = {} # just individual val, synergy calculated separately
holder = {}
for sol in allSols:
    for TM in allTeams:
        try:
            holder[TM].extend([getTeamVal_fromSol(sol,TM)])
        except:
            holder[TM] = [getTeamVal_fromSol(sol,TM)]
for TM in allTeams:
    objFcnVals[TM] = round(np.mean(holder[TM]),2)
            

In [None]:
comparisonDF = pd.DataFrame(columns=['team', 'model_v', 'actual_v', 'model_syn', 'actual_syn', 'scenario'])
for TM,vals in holder.items():
    for IDX,scene in enumerate(allScenarios):
#     print(TM)
        v = vals[IDX]
        s = master_teamSyn[scene][TM]
        v_prime,s_prime = getTeamVal_fromActual(historybref21,TM)

        comparisonDF = comparisonDF.append({
            'team':TM, 'model_v':v, 'actual_v':v_prime, 'model_syn':s, 'actual_syn':s_prime, 'scenario':scene
        },ignore_index=True)

    
comparisonDF.head() 

In [None]:
order = ['Case Study', 
         r'$\mathbf{z}^\text{zero}$',r'$\mathbf{z}^\text{top3}$',r'$\mathbf{z}^\text{top4}$',
        r'$\mathbf{z}^\text{rand}$',r'$\mathbf{r}^\text{-20}$',r'$\mathbf{r}^\text{-10}$',
         r'$\mathbf{r}^\text{+10}$',r'$\mathbf{r}^\text{+20}$'
        ]

allScenarios_replace = {
    'Actual': 'Case Study',
 '_minus20': r'$\mathbf{r}^\text{-20}$',
 '_minus10': r'$\mathbf{r}^\text{-10}$',
 '_plus10': r'$\mathbf{r}^\text{+10}$',
 '_plus20': r'$\mathbf{r}^\text{+20}$',
 '_indifferent': r'$\mathbf{z}^\text{zero}$',
 '_rand1': r'$\mathbf{z}^\text{top3}$',
 '_rand2': r'$\mathbf{z}^\text{top4}$',
 '_rand3': r'$\mathbf{z}^\text{rand}$'
}

In [None]:
comparisonDF = comparisonDF.replace(allScenarios_replace)

In [None]:
## Figures 8 and 9 
fig = go.Figure()
fig.add_trace(go.Box(y=comparisonDF.actual_v,name='2019-20'))


fig.add_trace(go.Box(y=comparisonDF.model_v,x=comparisonDF.scenario))

fig.add_trace(go.Box(y=origDF.origVal,name='2020-21'))

fig.update_layout(showlegend=False,
                 font=dict(family="Computer Modern",size=14),
                 title = 'Average Individual Value',
                  yaxis_title='Win Shares',
                 boxmode='group')
fig.update_xaxes(categoryorder='array', categoryarray= order)
fig.show()

fig = go.Figure()
# fig.add_trace(go.Histogram(x=comparisonDF.model_syn))
fig.add_trace(go.Box(y=comparisonDF.actual_syn,name='2019-20'))
fig.add_trace(go.Box(y=comparisonDF.model_syn,x=comparisonDF.scenario))

fig.add_trace(go.Box(y=origDF.origSynergy,name='2020-21'))

fig.update_layout(showlegend=False,
                 font=dict(family="Computer Modern",size=14),
                 title = 'Average Synergy Value',
                  yaxis_title='Positive Interactions',
                 boxmode='group')
fig.update_xaxes(categoryorder='array', categoryarray= order)
fig.show()

In [None]:
## added for sensitivity analysis, just make table of comparisons 
sensitivity = pd.DataFrame(columns=['scenario', 'v model', 'v actual', 'synergy model', 'synergy actual'])
for scene in comparisonDF.scenario.unique():
    temp = comparisonDF[(comparisonDF.scenario==scene)]
    modv = np.mean(temp.model_v)
    actv = np.mean(temp.actual_v)
    modsyn = np.mean(temp.model_syn)
    actsyn = np.mean(temp.actual_syn)
    sensitivity = sensitivity.append({
        'scenario':scene, 'v model':modv, 'v actual':actv, 'synergy model':modsyn, 'synergy actual':actsyn},
        ignore_index=True)
    
sensitivity['v%'] = (sensitivity['v model']-sensitivity['v actual'])/sensitivity['v actual']
sensitivity['syn%'] = (sensitivity['synergy model']-sensitivity['synergy actual'])/sensitivity['synergy actual']


In [None]:
## Figure 7
trd = sensitivity.loc[1:4]

fig = go.Figure()
fig = fig.add_trace(go.Scatter(x=trd['scenario'], y=trd['v%']*100,name='individual value',marker_color='rgba(152, 0, 0, .8)'))
fig = fig.add_trace(go.Scatter(x=trd['scenario'], y=trd['syn%']*100,name='synergy',marker_color='rgba(260, 199, 0, 1)'))

fig.update_layout(font=dict(family="Computer Modern",size=14),
                  yaxis_title='Percent Improvement',
                  xaxis_title='Scenario',
                  title='League-Wide Average Improvement'
                 )

fig.show()

In [None]:
## calculate team val, synergy from actual (historybref)
historybref21 = historybref[(historybref.Season=='2020-21')]
def getEconVal_actual(df,team): # Tm, pid
    temp = df[(df['Tm']==team)]
    lineup = set(temp['pid'])
    SSlineup = [pid2vBar[i] for i in lineup if i in pid2vBar]
    count = len(SSlineup)
    eVal = sum(SSlineup)
    return count,eVal

Xdf = pd.DataFrame(sol.X).reset_index()
Xdf.columns = ['from', 'to', 'player', 'status']
def getEconVal_model(Xdf,team): # player
    temp = Xdf[((Xdf['from']==team)&(Xdf['status']==0))|((Xdf['to']==team)&(Xdf['status']==1))]
    lineup = set(temp['player'])
    SSlineup = [pid2vBar[i] for i in lineup if i in pid2vBar]
    count = len(SSlineup)
    eVal = sum(SSlineup)
    return count,eVal

masterEcon = {}
for IDX,sol in enumerate(allSols):
    Xdf = pd.DataFrame(sol.X).reset_index()
    Xdf.columns = ['from', 'to', 'player', 'status']
    econDF = pd.DataFrame()
    for i,team in enumerate(allTeams):
        _,yActual = getEconVal_actual(historybref21,team)
        _,yModel = getEconVal_model(Xdf,team)
    #     print(f'{team} {yActual} {yModel}')
        econDF = econDF.append({'team':team,'actualEcon':yActual,'modelEcon':yModel},ignore_index=True)
    masterEcon[IDX] = econDF


In [None]:
## economic values plots ##
for IDX in masterEcon:
#     fig = go.Figure()
    tempDF = masterEcon[IDX]
    
    fig = go.Figure(data=[
        go.Bar(name='Before Optimal', x=tempDF.team, y=tempDF.actualEcon,opacity=0.8),
        go.Bar(name='Optimal', x=tempDF.team, y=tempDF.modelEcon,opacity=0.5), #offset = 0.000005,
    ])
    mean1 = np.mean(tempDF.actualEcon)
    fig.add_shape(
            go.layout.Shape(
                type="line",
                x0=-.5,
                y0=mean1,
                x1=len(tempDF.actualEcon)-.5,
                y1=mean1,
                line=dict(
                    color="blue",
                    width=4,
#                     dash="dash",
                ),
        ))

    mean2 = np.mean(tempDF.modelEcon)
    fig.add_shape(
            go.layout.Shape(
                type="line",
                x0=-.5,
                y0=mean2,
                x1=len(tempDF.modelEcon)-.5,
                y1=mean2,
                line=dict(
                    color="orange",
                    width=4,
#                     dash="dash"
                ),
        ))

    if allScenarios[IDX] == 'Actual':
        titleStr = ''
    else:
        titleStr = f', Scenario {allScenarios[IDX]}'
    fig.update_layout(font=dict(family="Computer Modern",size=14),
                      yaxis_title='Average Value',
                      title=f'Economic Value Results {titleStr}'
    #                   showlegend=False,
    #                   margin_b=150
    #                  xaxis={'categoryorder':'total descending'}
                     )
    fig.update_xaxes(
        tickangle=45,
    )
    fig.show()
    print((mean2-mean1)/mean1)

In [None]:
## value plots ## (only grab the values from here)
for scene in comparisonDF.scenario.unique():
#     fig = go.Figure()
    tempDF = comparisonDF[(comparisonDF.scenario==scene)]
    
    fig = go.Figure(data=[
        go.Bar(name='Before Optimal', x=tempDF.team, y=tempDF.actual_v,opacity=0.8),
        go.Bar(name='Optimal', x=tempDF.team, y=tempDF.model_v,opacity=0.5), #offset = 0.000005,
    ])
    mean1 = np.mean(tempDF.actual_v)
    fig.add_shape(
            go.layout.Shape(
                type="line",
                x0=-.5,
                y0=mean1,
                x1=len(tempDF.actual_v)-.5,
                y1=mean1,
                line=dict(
                    color="blue",
                    width=4,
#                     dash="dash",
                ),
        ))

    mean2 = np.mean(tempDF.model_v)
    fig.add_shape(
            go.layout.Shape(
                type="line",
                x0=-.5,
                y0=mean2,
                x1=len(tempDF.model_v)-.5,
                y1=mean2,
                line=dict(
                    color="orange",
                    width=4,
#                     dash="dash"
                ),
        ))

    if scene == 'Actual':
        titleStr = ''
    else:
        titleStr = f', Scenario {scene}'
    fig.update_layout(font=dict(family="Computer Modern",size=14),
                      yaxis_title='Average Value',
                      title=f'Individual Value Results {titleStr}'
    #                   showlegend=False,
    #                   margin_b=150
    #                  xaxis={'categoryorder':'total descending'}
                     )
    fig.update_xaxes(
        tickangle=45,
    )
    fig.show()
    print((mean2-mean1)/mean1)

In [None]:
## synergy plots ## (only grab the values from here)
for scene in comparisonDF.scenario.unique():
#     fig = go.Figure()
    tempDF = comparisonDF[(comparisonDF.scenario==scene)]
    
    fig = go.Figure(data=[
        go.Bar(name='Before Optimal', x=tempDF.team, y=tempDF.actual_syn,opacity=0.8),
        go.Bar(name='Optimal', x=tempDF.team, y=tempDF.model_syn,opacity=0.5), #offset = 0.000005,
    ])
    mean1 = np.mean(tempDF.actual_syn)
    fig.add_shape(
            go.layout.Shape(
                type="line",
                x0=-.5,
                y0=mean1,
                x1=len(tempDF.actual_syn)-.5,
                y1=mean1,
                line=dict(
                    color="blue",
                    width=4,
#                     dash="dash",
                ),
        ))

    mean2 = np.mean(tempDF.model_syn)
    fig.add_shape(
            go.layout.Shape(
                type="line",
                x0=-.5,
                y0=mean2,
                x1=len(tempDF.model_syn)-.5,
                y1=mean2,
                line=dict(
                    color="orange",
                    width=4,
#                     dash="dash"
                ),
        ))

    if scene == 'Actual':
        titleStr = ''
    else:
        titleStr = f', Scenario {scene}'
    fig.update_layout(font=dict(family="Computer Modern",size=14),
                      yaxis_title='Average Value',
                      title=f'Synergy Value Results {titleStr}'
    #                   showlegend=False,
    #                   margin_b=150
    #                  xaxis={'categoryorder':'total descending'}
                     )
    fig.update_xaxes(
        tickangle=45,
    )
    fig.show()
    print((mean2-mean1)/mean1)

In [None]:
## Table 5 - Trade Results Sensitivity Analysis
## check trades for fairness scenarios
fairnessScenarios = allScenarios[:5]
for IDX,scene in enumerate(fairnessScenarios):
    sol = allSols[IDX]
    trades = pd.DataFrame(sol.X[(sol.X==1)]).reset_index()
    trades.columns = ['from_team', 'to_team', 'player', 'X']
    trades = trades[(trades.from_team!='i_f')]
    oneWay = list(zip(trades.from_team,trades.to_team))
    otherWay = list(zip(trades.to_team,trades.from_team))
    tempTrades = pd.DataFrame([oneWay,otherWay]).T
    tempTrades.columns=['front','back']
    tradeList = {}
    for trade in tempTrades.front.unique():
        tradeList[trade] = tempTrades[(tempTrades.front==trade)|(tempTrades.back==trade)].index.values


    differencesV = {}
    differencesM = {}
    for key in tradeList:
        temp = trades.loc[tradeList[key]].replace(pid2v)
        from_val = temp.loc[temp['from_team'] == key[0], 'player'].sum()
        to_val = temp.loc[temp['from_team'] == key[1], 'player'].sum()
        differencesV[key] = abs(from_val-to_val) #abs
        temp = trades.loc[tradeList[key]].replace(pid2m)
        from_mon = temp.loc[temp['from_team'] == key[0], 'player'].sum()
        to_mon = temp.loc[temp['from_team'] == key[1], 'player'].sum()
        differencesM[key] = abs(from_mon-to_mon) #abs
    print(f'------------------\n{scene}\n------------------')
    print(f'Value: {allSols[IDX].rv}')
    print(pd.DataFrame([differencesV.values()]).T.describe(),'\n')
    print(f'Money: {allSols[IDX].rm}')
    print(pd.DataFrame([differencesM.values()]).T.describe(),'\n')

In [None]:
## Figure 3 -- Competitive Balance Results
fig = go.Figure()
fig.add_trace(go.Bar(x=minmax['j in T'],y=minmax['ub'],name='Before Optimal',opacity=0.8))
fig.add_trace(go.Bar(x=minmax['j in T'],y=minmax['rhs'],name='Optimal',opacity=0.5))
zval = minmax['lhs'].unique()[0]
fig.add_shape(
        go.layout.Shape(
            type="line",
            x0=-.5,
            y0=zval,
            x1=len(minmax['j in T'])-.5,
            y1=zval,
            line=dict(
                color="black",
                width=4,
            ),
    ))
fig.add_trace(go.Scatter(x=[''],y=[''],name=r'$Z$',marker=dict(color='black')))
fig.update_layout(font=dict(family="Computer Modern",size=14),
                  yaxis_title='Max-Min Constraint Value',
                  title=f'Competitive Balance Results'
                 )
fig.update_xaxes(
    tickangle=45,
)
fig.show()

In [None]:
## percentage Improvements over Z (min team value)
for IDX,sol in enumerate(allSols):
    minmax = sol.coop1.reset_index()
    minmax['diff'] = minmax['rhs']-minmax['ub']
    print(allScenarios[IDX],(sol.Z.values[0]-min(minmax['ub']))/min(minmax['ub']))

In [None]:
# Figure 2 (Trade Counts by Group) -- plus all scenario results for discussion
for scene in allScenarios:
#     print(scene,':\n',Counter(tradeDict[scene].replace(pid2clust).pid),'\n')
    dist = Counter(tradeDict[scene].replace(pid2clust).pid)
    fig = go.Figure()
    fig.add_trace(go.Bar(y=list(dist.keys()),x=list(dist.values()),orientation='h',marker_color='navy',
                        text=list(dist.values()),textposition='auto',textangle = 0,
                        ))
    fig.update_layout(yaxis={'automargin':True,'categoryorder':'total descending'})
    fig.update_layout(font=dict(family="Computer Modern",size=14),
                      xaxis_title='Count',
                      title=f'Number of Trades by Group {allScenarios_replace[scene]}'
    #                   showlegend=False,
    #                   margin_b=150
    #                  xaxis={'categoryorder':'total descending'}
                     )
    fig.show()