In [None]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
from sklearn.linear_model import LinearRegression,Ridge,ElasticNet,ElasticNetCV,LassoCV,SGDRegressor,RidgeCV
from collections import defaultdict
import json
import statsmodels.api as sm

from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import lightgbm as lgb
import os, sys

import fnmatch
plt.style.use('seaborn-white')

In [None]:
file = 'big_data.json'
data = json.load(open(file,'rt',encoding='utf-8-sig'))
player_ratings = {}
player_picks = {}
replacement_ovrs = []
player_born = {}
player_seasons = defaultdict(int)
team_rosters = defaultdict(list)
contracts = {}
draft_picks = defaultdict(list)
draft_salaries = {}

team_players = defaultdict(set)
for p in data['players']:
    player_born[p['pid']] = p['born']['year']
    dp = p['draft']['pick']+ (p['draft']['round']-1)*30
    if dp < 0:
        dp = 0
    player_picks[p['pid']] = (dp,p['draft']['year'])
    dtid = p['draft']['originalTid']
    if dtid >= 0:
        draft_picks[(dtid,p['draft']['year'])].append(dp)
    
    for r in p['ratings']:
        player_seasons[p['pid']] += 1
        player_ratings[(p['pid'],r['season'])] ={k:v for k,v in r.items() if type(v) == int and k != 'season'}
    for r in p['stats']:
        team_players[(r['tid'],r['season'])].add(p['pid'])
        
    td = {s['season']: s['amount'] for s in p['salaries']}
    new_c = {s:td[s+1] for s in td if s+1 in td and td[s+1]!=td[s]}

    for s,c in new_c.items():
        if c == 750:
            replacement_ovrs.append(player_ratings[(p['pid'],s)]['ovr'])
            
    for k,a in td.items():
        yl = 0
        ko = k + 0

        while k+1 in td:
            if td[k+1] == a:
                yl += 1
            else:
                break
            k+=1
        contracts[(p['pid'],ko)] = (a,yl)

    for k,a in td.items():
        if p['draft']['year']+1 == k:
            draft_salaries[dp] = a
            break

#for p in data['players']:
#    for r in p['stats']:
#        if r['min'] > 0:
#            pass

In [None]:
{i:draft_salaries[i+1] for i in range(60)}

In [None]:
plt.hist(replacement_ovrs)
RL = np.mean(replacement_ovrs)
RL

In [None]:
team_movs = {}

for t in data['teams']:
    tid = t['tid']
    for ts in t['stats']:
        if not ts['playoffs'] and ts['gp'] > 0:
            season = ts['season']
            mov = (ts['pts'] - ts['oppPts']) / ts['gp'];
            team_movs[(tid,season)] = mov


In [None]:
won_cup = {}
season_ratings = defaultdict(list)

for team in data['teams']:
    tid = team['tid']
    for season in team['seasons']:
        s = season['season']
        if season['gp'] >= 82:
            won_cup[(tid,s)] = float(season['playoffRoundsWon'] == 4)
            season_ratings[s].append((season['won'],team_movs[(tid,s)]))


In [None]:
won_cup2 = np.array([[team_movs[k],won_cup[k]] for k in won_cup])
plt.scatter(won_cup2[:,0],won_cup2[:,1])
cB,cA = sm.Logit(won_cup2[:,1],sm.add_constant(won_cup2[:,0])).fit().params

xp = np.linspace(-10,20)
win_p = 1.0/(1+np.exp(-xp*cA -cB))
plt.plot(xp,win_p,c='r')
cA,cB

In [None]:
mov_to_pos = []
for k in season_ratings:
    mov_to_pos += [(i,_[1]) for i,_ in enumerate(sorted(season_ratings[k]))]
arr = np.array(mov_to_pos)
clf3 = sm.OLS(arr[:,0],sm.add_constant(arr[:,1])).fit()
plt.scatter(arr[:,0],arr[:,1])
clf3.summary()

In [None]:
clf3 = sm.Logit(arr[:,0]/29,sm.add_constant(arr[:,1])).fit()

xp = np.linspace(-15,15)
yp = 1/(1+np.exp(0.0036 - 0.368*(xp)))
plt.plot(xp,29*yp)

clf3.summary()


In [None]:
offset_years = defaultdict(list)
for k,v in team_movs.items():
    tid,s = k
    for i in range(1,5):
        if (tid,s+i) in team_movs:
            offset_years[i].append([v,team_movs[(tid,s+i)]])

In [None]:
for k,v in offset_years.items():
    arr = np.array(offset_years[k])
    plt.figure()
    plt.scatter(arr[:,0],arr[:,1])
    plt.title(k)
    clf3 = sm.OLS(arr[:,0],sm.add_constant(arr[:,1])).fit()
    xp = np.linspace(-15,15)
    plt.plot(xp,clf3.params[0]+xp*clf3.params[1],c='r')
    print(k,clf3.rsquared,clf3.params,)

In [None]:
clf3.summary()

In [None]:
team_ovrs = {k: sorted([player_ratings[(p,k[1])]['ovr'] for p in team_players[k]],reverse=True)[:10] for k in team_players}

In [None]:
sorted(team_ovrs[(23,2023)],reverse=True)[:10]

In [None]:
Xs = []
ys = []
for t, s in team_movs:
    Xs.append(team_ovrs[(t,s)])
    ys.append(team_movs[(t,s)])

In [None]:
import statsmodels.api as sm

In [None]:
res = sm.OLS(ys,sm.add_constant(Xs)).fit()

In [None]:
import scipy.optimize as opt
def best_fit_func_exp(x):
    return np.linalg.norm(( Xs @ np.exp(x[0]*np.arange(10))*x[1]-x[2] - ys))
a,b = np.polyfit(np.arange(10),np.log(np.array(res.params[1:])),1)
res2 = opt.minimize(best_fit_func_exp,[a,np.exp(b),-125],method='Nelder-Mead')
print(res2.x)

In [None]:
plt.plot(res.params[1:])
plt.plot( np.exp(res2.x[0]*np.arange(10))*res2.x[1])
plt.xlabel('value of Nth best player')
plt.grid(True)
_ = plt.xticks(np.arange(10),np.arange(10)+1)

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(ys,res.predict(),alpha=0.1,s=10)
plt.xlabel('true MOV')
plt.ylabel('predicted MOV')
plt.xlim(-15,15)
plt.ylim(-15,15)
plt.grid(True)

In [None]:
res.summary()

In [None]:
((res.predict()-np.array(ys))**2).mean(), (np.array(ys)**2).mean()

In [None]:
def get_rating(x):
    vec = []
    offset = len(x)//3
    x = np.round(x,2)
    x[:4] *= 10
    x[-4:] *= 10
    #x[:3] = np.array([72.8,61.5,39.5])
    #x[3:] = np.array([4.1,1.1,0.79, 13.2,9.5,8.8])
    for row in Xs:
        tot = 0
        for i in range(offset):
            tot += (x[offset+i])*sum([np.tanh((_-x[i])/x[2*offset+i])+1.0 for _ in row])
            #print(x[i],row)
            #tot += 5*sum([float(_ > x[i]) for _ in row])

        vec.append(tot)
    vec = np.array(vec)
    vec -= vec.mean()
    return ((vec-ys)**2).mean()#,vec
#es.best.x

iv = [78.2,62.3,40.4, 4.73,1.63,0.85, 13.9,8.8,9.5]
iv = [72.67,61.14,39.29, 4.27,1.03,0.80, 13.26,9.45,8.9]
iv = [73.4, 59.3, 53.9, 36.7,  3.9,  2. , -0.8,  1.3, 11.7, 10.7, 13.7,
        10.5]
iv = [ 7.59,  6.1 ,  5.59,  4.18,  4.26,  2.09, -0.89,  1.03,  1.27,
         0.95,  1.47,  0.94]
r2 = get_rating(iv)
#plt.scatter(ys,r2[1],alpha=0.1,s=10)
r2

In [None]:
import scipy.optimize as opt
import cma

es = cma.CMAEvolutionStrategy(iv, 0.02, {'tolx':1e-6,'tolfun':1e-6,'maxfevals':1})
es.optimize(get_rating)

In [None]:
get_rating(es.best.x)

In [None]:
tv = np.round(es.best.x,2)
tv,get_rating(tv)

In [None]:
iv = np.array(iv)
iv[:4] *=10
iv[-4:] *=10

In [None]:
def get_rating_vec(x):
    vec = []
    offset = len(x)//3
    for row in Xs:
        tot = 0
        for i in range(offset):
            tot += (x[offset+i])*sum([np.tanh((_-x[i])/x[2*offset+i])+1.0 for _ in row] ) 
            #print(x[i],row)
            #tot += 5*sum([float(_ > x[i]) for _ in row])

        vec.append(tot)
    vec = np.array(vec)
    #vec -= vec.mean()
    return vec
plt.figure(figsize=(5,5))
vt = get_rating_vec(iv)
vtm = vt.mean()/10
plt.scatter(ys,vt-vtm*10,alpha=0.1,s=10)
#plt.scatter(ys,res.predict(),alpha=0.1,s=10)

plt.xlabel('true MOV')
plt.ylabel('predicted MOV')
plt.xlim(-15,15)
plt.ylim(-15,15)
plt.grid(True)
print(-vtm*10)

In [None]:
def get_mov(x):
    offset = len(iv)//3
    tot = -vtm
    for i in range(offset):
        tot += (iv[offset+i])*(np.tanh((x-iv[i])/iv[2*offset+i])+1.0)
    return tot
xp = np.linspace(20,90)
yp = [get_mov(_) for _ in xp]
plt.plot(xp,yp)
plt.grid(True)
print(vtm)

In [None]:
iv

In [None]:
NR = 6
iv2 = [67.6,62.8,54.8,43.8,  6.3,3.9,2.5,1.5]
iv2 = [ 7.09,  6.39,  5.79,  5.29,  4.26,  3.64,  5.08,  2.51,  1.1 ,
         0.2 , -0.59, -1.24]
def get_rating2(x):
    x = np.array(x).astype(float)
    #x = np.round(x,2)
    x[:NR]*=10
    #x[:NR] = iv[:NR]
    #x[NR:] = iv[NR:]
    vec = []
    for row in Xs:
        Vs = (row - x[:NR].reshape((NR,1)))
        vec.append(x[NR+np.argmin(Vs**2,axis=0)].sum())
    vec = np.array(vec)
    vec -= vec.mean()
    return ((vec-ys)**2).mean()#,vec
def get_rating2vec(x):
    x = np.array(x).astype(float)
    #x[:NR] = iv[:NR]
    #x[NR:] = iv[NR:]
    vec = []
    for row in Xs:
        Vs = (row - x[:NR].reshape((NR,1)))
        vec.append(x[NR+np.argmin(Vs**2,axis=0)].sum())
    vec = np.array(vec)
    vec -= vec.mean()
    return vec
#es.best.x

r2 = get_rating2(iv2)
r2v = get_rating2vec(iv2)
#plt.scatter(ys,r2[1],alpha=0.1,s=10)
r2

In [None]:
r2v.mean()

In [None]:
import scipy.optimize as opt
import cma

es = cma.CMAEvolutionStrategy(iv2, 0.05, {'tolx':1e-6,'tolfun':1e-6,'maxfevals':1})
es.optimize(get_rating2)

In [None]:
np.round(es.best.x,3),get_rating2(es.best.x)

In [None]:
iv2 = np.array(iv2)
ovrs = iv2[:NR]*10
weights = iv2[NR:]
[(round(a),b) for a,b in zip(ovrs,weights)]

In [None]:
pick_res = defaultdict(list)
for k,v in player_ratings.items():
    pid,s = k
    pr,ps = player_picks[pid]
    year = s-ps
    #if year < 6:
    pick_res[(pr,year)].append(np.argmin((v['ovr']-ovrs)**2))

In [None]:
probs = np.zeros((61,5,NR))
for p in range(61):
    #plt.plot([np.mean(pick_res[(p,y)]) for y in range(6)])
    for y in range(5):
        lenpy = len(pick_res[(p,y)])
        if lenpy > 0:
            for i in range(NR):
                probs[p,y,i] = sum([1 for _ in pick_res[(p,y)] if _ == i])/lenpy
        else:
            pass#print(p,y)

In [None]:
if True:
    draft_value = [(probs[p] * weights).sum(1) for p in range(61)]
else:
    # don't resign bad players!
    draft_valuea = np.array([(probs[p] * weights) for p in range(61)])
#plt.imshow(draft_valuea.sum(2), aspect='auto',cmap='Greys_r')

    draft_valuea[1:31,3:,:] = np.maximum(draft_valuea[1:31,3:,:],0)
    draft_valuea[31:,2:,:] = np.maximum(draft_valuea[31:,2:,:],0)
    #draft_valuea = np.maximum(draft_valuea,0)
    draft_value = draft_valuea.sum(2)
    pick = 20
    plt.imshow(draft_valuea[pick],aspect='auto',cmap='Greys_r',vmin=-1,vmax=1)
    plt.colorbar()
    np.round(draft_valuea[pick],2),np.round(probs[pick],2),weights

In [None]:
draft_value

In [None]:
r_lvl = -1.23#iv2[NR+np.argmin(abs(iv2[:NR]*10 - RL))]
r_lvliv2 = iv2[-1] - r_lvl
r_lvliv2,r_lvl,iv2[-1]

In [None]:
plt.style.use('fivethirtyeight')
plt.imshow(np.array(draft_value)-iv2[-1], aspect='auto',cmap='Greys_r')
plt.xlabel('years since drafted')
plt.ylabel('draft pick')
plt.title('draft value')
plt.colorbar()
plt.grid(False)
#plt.ylim(30,0)

In [None]:
a = (np.array(draft_value)-r_lvl).sum(1)[1:]
a[:30].mean(),a[30:].mean()

In [None]:
plt.plot((np.array(draft_value)-iv2[-1]).sum(1)[1:])
plt.xlabel('pick')
plt.ylabel('draft pick MOVaR')
plt.title('5year value')

In [None]:
tA = np.array(draft_value)#-weights[-1]
tA = tA[1:]#np.roll(tA,-1,0)

In [None]:
for i in range(4):
    plt.figure()
    plt.xlabel('pick')
    plt.ylabel('MOV')
    plt.title('year {}'.format(i+1))
    plt.plot(tA[:,i]-iv2[-1])

In [None]:
coeffs = []
for i in range(5):
    x = [1,1,1]
    def testX(x):
        return np.linalg.norm(x[1]*np.exp(-x[0]*np.arange(60)**x[2]) - tA[:,i]+iv2[-1])
    coeffs.append(opt.fmin(testX,x))

In [None]:
[list(_) for _ in coeffs]

In [None]:
r_lvliv2

In [None]:
for i,x in enumerate(coeffs[::-1]):
    #plt.figure()
    plt.plot( x[1]*np.exp(-x[0]*np.arange(61)**x[2]) + r_lvliv2,label='year {}'.format(5-i))
plt.legend()
plt.plot([0,60],[0,0],c='k',lw=1.5,ls='--')
plt.xlabel('draft pick')
plt.ylabel('MOV above replacement')

In [None]:
for i,x in enumerate(coeffs[::-1]):
    plt.figure()
    plt.plot(tA[:,4-i])
    plt.plot(x[1]*np.exp(-x[0]*np.arange(61)**x[2])+iv2[-1])
    plt.title('year {}'.format(5-i))
    plt.xlabel('draft pick')
    plt.ylabel('MOV above replacement')

In [None]:
plt.plot((tA-r_lvl).sum(1))
plt.xlabel('draft pick')
plt.ylabel('5 year pick value\nabove replacement')
plt.title('keeping all players')

In [None]:
xp = np.linspace(30,80)
yp = [get_mov(_) for _ in xp]
plt.plot(xp,yp,label='smooth')
plt.plot(xp,weights[np.argmin(abs(xp-ovrs[:,None]),0)],label='discrete')
plt.xlabel('ovr')
plt.ylabel('MOV')
plt.legend()

In [None]:
mov = {}
for i in np.arange(101):
    mov[i] = get_mov(i)
np.round(mov[int(round(RL))],2)

In [None]:
age_loop = defaultdict(list)
for pid,s in player_ratings:
    if player_seasons[pid] < 14:
        continue
    k = (pid,s)
    nk =(pid,s+1)
    age = s-player_born[pid]
    if nk in player_ratings:
        age_loop[age].append(player_ratings[nk]['ovr']-player_ratings[k]['ovr'])
    #player_ratings[k]['ovr']

In [None]:
from scipy.stats import trim_mean
age_shift = {k:trim_mean(age_loop[k],.01) for k in sorted(age_loop.keys()) if k < 36}

In [None]:
{k:round(v,1) for k,v in age_shift.items()}

In [None]:
valid_seasons = sorted(set([s for tid,s in team_movs]))
valid_seasons = valid_seasons[:-5]
max_shift = max(age_shift.values())

In [None]:
ovr_salaries = defaultdict(list)
sal_ovrd = defaultdict(list)
for p,s in player_ratings:
    v = player_ratings[(p,s)]
    if (p,s) in contracts:
        c = contracts[(p,s)][0]
        ovr_salaries[v['ovr']].append(c)
        sal_ovrd[c].append(v['ovr'])
sal_ovr = np.array([[s,mov[round(np.mean(c))]-r_lvl] for s,c in sal_ovrd.items()])
sal_ovr[:,0]/=30000

sO = sm.OLS(sal_ovr[:,1],sal_ovr[:,0]).fit()
plt.scatter(sal_ovr[:,0],sal_ovr[:,1])
plt.scatter(sal_ovr[:,0],sO.predict())
sA = sO.params[0]
sA

In [None]:
Xs = defaultdict(list)
ys = defaultdict(list)
for tid, s in team_movs:
    if s in valid_seasons:
        pars = defaultdict(list)
        tss = defaultdict(int)

        for i in range(5):
            for pid in team_players[(tid,s)]:
                if (pid,s) in contracts and contracts[(pid,s)][1] >= i:
                    tss[i] += contracts[(pid,s)][0]
                    age = s-player_born[pid]
                    ovr = player_ratings[(pid,s)]['ovr']
                    for j in range(i):
                        ovr+=age_shift.get(age+j,max_shift)
                    pars[i].append(mov[round(np.clip(ovr,0,100))])
                    #print(i,pid,age,ovr)
                elif (pid,s) in contracts:
                    age = s-player_born[pid]
                    ovr = player_ratings[(pid,s)]['ovr']
                    for j in range(i):
                        ovr+=age_shift.get(age+j,max_shift)
                    mov_p = mov[round(np.clip(ovr,0,100))]
                    if mov_p > 0:
                        RESIGN_CHANCE = 0.85
                        est_con = min(1,mov_p/sA) * 90000 
                        tss[i] += est_con*RESIGN_CHANCE
                        pars[i].append(RESIGN_CHANCE*mov_p)  
        for i in range(1,5):
            dpicks = draft_picks[(tid,s+i)]
            for p in dpicks:
                dsal = draft_salaries[p]
                for k,j in enumerate(range(i,5)):
                    tss[j] += dsal
                    x = coeffs[k]
                    pars[j].append(iv2[-1]+x[1]*np.exp(-x[0]*(p**x[2])))

            #print(contracts[(pid,s)],(player_ratings[(pid,s)]['ovr']),)
        for i in range(5):
            tmov = team_movs[(tid,s+i)]
            play = [p for p in pars[i] if p >= r_lvl]
            lp = len(play)
            if lp < 10:
                play= play + (10-lp)*[r_lvl]
            play = sorted(play,reverse=True)[:10]
            play_s = sum(play)
            Xs[i].append([tss[i] + (10-lp)*750,play_s])
            ys[i].append(tmov)


In [None]:
plt.hist(np.array(Xs[0])[:,1])

In [None]:
for i in range(5):
    pred = sm.OLS(ys[i],sm.add_constant(Xs[i])).fit()
    print(i,pred.rsquared,np.round(pred.params,2))

In [None]:
SAa = np.array([[k,np.mean(v)] for k,v in ovr_salaries.items()])
plt.scatter(SAa[:,0],SAa[:,1])

In [None]:
xp = sorted([k for k in ovr_salaries.keys() if k > 45])
yp = [np.mean(ovr_salaries[k]) for k in xp]
yp2 = [1000*(mov[k]-r_lvl) for k in xp]

plt.plot(xp,yp)
plt.plot(xp,yp2)


In [None]:
vvec = np.array(yp2)/np.array(yp)
plt.plot(xp,vvec)
plt.plot(xp,0.11*np.log(np.array(xp)-43))
plt.xlabel('ovr')
plt.ylabel('value over replacement per $1M')
def log_fit(x):
    return np.linalg.norm(x[0]*np.log(np.array(xp)-43)-vvec)
opt.optimize.fmin(log_fit,[1])

In [None]:
0.4//0.2

In [None]:
yp = {}
for i in range(5):
    v = np.array(Xs[i])
    cap_space = np.maximum(90000-v[:,0],0)/90000
    ot = 1.0/3.0
    mov_from_cap = cap_space*sA
    #pred_mov = v[:,1] + mov_from_cap
    RV = np.array([v[:,1], mov_from_cap]).T
    #if i==0:
    #    RV[:,1] *= 0
    #if i== 4:
    #    RV[:,0] *= 0
    pred = sm.RLM(ys[i],RV).fit()
    print(i,":",np.round(pred.params,2),",")#,pred.rsquared)
    yp[i] = pred.predict()
    #print(i,mov_from_cap.mean(),v[:,1].mean())

In [None]:
plt.hist(yp[0])

In [None]:
plt.hist(np.array(Xs[0])[:,1])

In [None]:
for i in range(5):
    plt.figure()
    plt.scatter(ys[i],yp[i])
