In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

In [None]:
rate_to_use = ['stre', 'spd', 'jmp', 'endu', 'ins', 'dnk', 'ft', 'fg', 'tp', 'oiq', 'diq', 'drb', 'pss', 'reb', 'hgt']

In [None]:
sim = json.load(open('newresign.json','rb'))
data = json.load(open('real-player.json','rb'))

In [None]:
picks = defaultdict(list)
picks_by_year = defaultdict(list)
X = []
y = []
i =0 
for p in sim['players']:
    year= p['draft']['year']
    if year < 2021:
        continue
    pick = (p['draft']['round']-1)*30 + p['draft']['pick']
    vec = [p['ratings'][0][k] for k in rate_to_use]
    vec = np.array(vec)
    X.append(vec)
    y.append(p['ratings'][0]['ovr'])
    picks[pick].append(vec)
    if pick < 31:
        picks_by_year[year].append(vec)
    i+=1

In [None]:
import statsmodels.api as sm
# good enough
clf = sm.OLS(y,sm.add_constant(X)).fit()

In [None]:
picks_a = {k: np.array(v) for k,v in picks.items()}
picks_o = {k: np.mean([clf.predict([1] + list(p)) for p in v]) for k,v in picks.items()}
picks_v = {k: np.std([clf.predict([1] + list(p)) for p in v]) for k,v in picks.items()}
picks_yr = {k: np.mean([clf.predict([1] + list(p)) for p in v]) for k,v in picks_by_year.items()}

avg_v =np.std(list(picks_yr.values()))

In [None]:
picks_norm = {k: np.array(v) for k,v in picks.items() if len(v) > 1}
picks_norm = {k: v/v.sum(1,keepdims=True) for k,v in picks_norm.items()}

In [None]:
roster = json.load(open('1947_preseason.json','rb'))

In [None]:
draft_noise = {}
for p in roster['players']:
    if 'srID' not in p:
        continue
    srId = p['srID']
    if srId not in data['bios']:
        continue
    bio = data['bios'][srId]
    pick = (bio['draftRound']-1)*30 + bio['draftPick']
    vec = np.array([p['ratings'][0][k] for k in rate_to_use])
    vecN = vec/vec.sum()
    if pick not in picks_norm:
        pick = -30
    # old version, use simulated samples
    #diff = np.linalg.norm(picks_norm[pick]- vecN,axis=1)
    #diff_idx = np.argsort(diff)[:3]
    #stats = np.mean(picks_a[pick][diff_idx],0)
    
    # each draft year gets noise
    year = bio['draftYear']
    if year in draft_noise:
        noise = draft_noise[year]
    else:
        noise = np.random.randn()*avg_v
        draft_noise[year] = noise
    # each pick gets noise
    #noise = np.random.randn()*picks_v[pick]
    target_ovr = picks_o[pick] + noise 
    n = (target_ovr - clf.params[0])/np.sum(clf.params[1:]*vecN)
    stats =vecN*n
    
    # let the game recompute these on import
    del p['ratings'][0]['ovr']
    del p['ratings'][0]['pot']
    del p['ratings'][0]['fuzz']
    del p['ratings'][0]['skills']
    del p['ratings'][0]['pos']

    for k,v in zip(rate_to_use,stats):
        p['ratings'][0][k] = int(round(np.clip(v,0,100)))
    #if srId == 'alexacl01':
    #    print(pick,np.mean(stats),diff_idx)

In [None]:
with open('1947_var2.json','wt') as fp:
    json.dump(roster,fp)