In [None]:
import json
import os
import sys
import fnmatch


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
teams = ['MIL','TOR','PHI','BOS','IND','BRK','ORL','DET','CHO','MIA',\
         'WAS','ATL','CHI','CLE','NYK','GSW','DEN','POR','HOU','UTA',\
         'OKC','SAS','LAC','SAC','LAL','MIN','MEM','NOP','DAL','PHO']

In [None]:
def load_roster(filename):
    base = json.load(open(filename,'rb'))

    
    for i in range(len(base['players'])):
        player = base['players'][i]
        if 'name' in player and player['name'] !='':
            sname = base['players'][i]['name'].split()
            base['players'][i]['firstName'] = sname[0]
            base['players'][i]['lastName'] = ' '.join(sname[1:])
            del base['players'][i]['name']
    players = base['players']
    base['version'] = 32

    return players,base

In [None]:
players,base = load_roster('2019-20.NBA.Roster.json')
abbrev_to_tid = {_['abbrev']:_['tid']for _ in base['teams']}
abbrev_to_tid['PHO'] = abbrev_to_tid['PHX']
abbrev_to_tid['CHO'] = abbrev_to_tid['CHA']
abbrev_to_tid['BRK'] = abbrev_to_tid['BKN']
len(players)

In [None]:
players_discard = [_ for _ in players if  _['tid'] > -3 ]
players = [_ for _ in players if _['tid'] in set([-1,-2,-4,-5])]


In [None]:
current_ratings =  [_['ratings'][0] for _ in players_discard]
for cr,p in zip(current_ratings,players_discard):
    cr['age'] = 2019 - p['born']['year']
current_ratings = pd.DataFrame(current_ratings)
if 'ovr' in current_ratings.columns:
    current_ratings = current_ratings.drop(['ovr'],1)


In [None]:
old_players = []
for file in sorted(os.listdir('roster_samples')):
    if fnmatch.fnmatch(file, '*.json'):
        print(file)
        players2,_ = load_roster(os.path.join('roster_samples',file))
        old_players += players2

In [None]:
players2_discard = [_ for _ in old_players if  _['tid'] > -3 ]
old_ratings =  [_['ratings'][0] for _ in players2_discard]
for cr,p in zip(old_ratings,players2_discard):
    cr['age'] = 2044 - p['born']['year'] 
old_ratings = pd.DataFrame(old_ratings)
old_ratings = old_ratings.drop(['ovr'],1)

In [None]:
import scipy.stats
stats_correct = {}
print(len(players))
# hack for 3pt
xp = np.linspace(0,100,101)
yp = np.exp(-(xp)**2/(15)**2)
yp /= (yp[0]/8)
yp = np.clip(yp,1,10000)
plt.plot(yp)

# remove tp fix for normal leagues
#if len(players) > 100:
#    yp = np.ones_like(yp)

for key in current_ratings.columns:
    orate,crate = old_ratings[key],current_ratings[key]
    #print('o {:.2f}\t{:.2f}\t{:.2f}\t{:.2f}'.format(orate.min(),orate.mean(),orate.median(),orate.std()))
    #orate = orate[orate > orate.median()]
    #crate = crate[crate > crate.median()]
    #print('n {:.2f}\t{:.2f}\t{:.2f}\t{:.2f}'.format(orate.min(),orate.mean(),orate.median(),orate.std()))

    if key == 'tp':
        crate = crate.apply(lambda x : (x+yp[x]/2)*yp[x])
    plt.figure()
    plt.hist(crate,20,label='base',alpha=0.5,density=True)
    plt.hist(orate,20,label='simulated',alpha=0.5,density=True)
    stats_correct[key] = {'sim': (scipy.stats.tmean(orate),scipy.stats.tstd(orate)), 'base': (scipy.stats.tmean(crate),scipy.stats.tstd(crate))}
    #if key == 'tp':
    #    stats_correct[key] = {'sim': (orate.mean(),orate.std()), 'base': (crate[crate >25].mean(),crate[crate >25].std())}

    plt.legend()
    plt.title(key)

In [None]:
if True:
    std_multi = 1.2
    mean_multi = 0.9
    for key in stats_correct:
        stats_correct[key]['base'] = (stats_correct[key]['base'][0]*mean_multi,stats_correct[key]['base'][1])
        stats_correct[key]['sim'] = (stats_correct[key]['sim'][0],stats_correct[key]['sim'][1]*std_multi)

if True:
    stats_correct['tp']['sim'] = (stats_correct['tp']['sim'][0],stats_correct['tp']['sim'][1]*1.2)
stats_correct

In [None]:
for player in base['players']:
    for key in player['ratings'][0]:
        if key in stats_correct:
            cv = player['ratings'][0][key]
            if key == 'tp':
                cv = (cv+yp[cv]/2)*yp[cv]
            z = (cv-stats_correct[key]['base'][0])/stats_correct[key]['base'][1]
            player['ratings'][0][key] = int(round(np.clip(z*stats_correct[key]['sim'][1] + stats_correct[key]['sim'][0],0,100)))

In [None]:
players_discard = [_ for _ in base['players'] if  _['tid'] > -3 ]
current_ratings2 =  [_['ratings'][0] for _ in players_discard]
for cr,p in zip(current_ratings2,players_discard):
    cr['age'] = 2019 - p['born']['year']
current_ratings2 = pd.DataFrame(current_ratings2)
if 'ovr' in current_ratings2.columns:
    current_ratings2 = current_ratings2.drop(['ovr'],1)
plt.figure(figsize=(24,12))
for idx,key in enumerate(current_ratings.columns):
    orate,crate = old_ratings[key],current_ratings[key]
    w,h = idx//4,idx%4
    plt.subplot(4,8,(1+8*h+w))
    plt.hist(orate,20,label='simulated',alpha=0.5,density=True)
    plt.hist(crate,20,label='base',alpha=0.5,density=True)
    
    plt.legend()
    plt.title(key)
for idx,key in enumerate(current_ratings.columns):
    orate,crate = old_ratings[key],current_ratings2[key]
    w,h = idx//4,idx%4
    plt.subplot(4,8,(1+8*h+w+4))
    plt.hist(orate,20,label='simulated',alpha=0.5,density=True)
    plt.hist(crate,20,label='corrected',alpha=0.5,density=True)

    plt.legend()
    plt.title(key)
plt.tight_layout()
plt.savefig('plot.png',edgecolor='w',facecolor='w')

In [None]:
with open('normed.json','wt') as fp:
    json.dump(base,fp, indent=4, sort_keys=True)