In [None]:
import json
import pandas as pd
import sklearn.linear_model as linear
import matplotlib.pyplot as plt
import numpy as np
ovr_weights =  {'diq': 0.093,
 'dnk': 0.0424,
 'drb': 0.0968,
 'endu': 0.0075,
 'fg': -0.0093,
 'ft': 0.049,
 'hgt': 0.225,
 'ins': -0.0143,
 'jmp': 0.0505,
 'oiq': 0.0971,
 'pss': 0.0657,
 'reb': 0.0534,
 'spd': 0.156,
 'stre': 0.0962,
 'tp': 0.105}
cols = sorted(list(ovr_weights.keys()))

s_cols = ['ast', 'astp', 'blk', 'blkp', 'dbpm', 'drb', 'drbp', 'drtg', 'dws', 'ewa', 'fg', 'fgAtRim', 'fgLowPost', 'fgMidRange', 'fga', 'fgaAtRim', 'fgaLowPost', 'fgaMidRange', 'ft', 'fta', 'gp', 'gs', 'min', 'minAvailable', 'obpm', 'orb', 'orbp', 'ortg', 'ows', 'per', 'pf', 'pm', 'pts', 'stl', 'stlp', 'td', 'tov', 'tp', 'tpa', 'trbp', 'usgp', 'vorp']


In [None]:
from collections import defaultdict
stats_X = []
ratings_X =  []
names = []
t_stats = defaultdict(lambda : np.zeros(len(s_cols)))
t_ratings = defaultdict(lambda : np.zeros(len(cols)))
t_minutes= defaultdict(int)

for file,MIN_YEAR in [('BBGM_2019_All_Stats.json',2000),('BBGM_AutoSim_20.json',0)]:
    with open(file,'rb') as fp:
        data = json.load(fp)

    for p in data['players']:
        stats = {_['season']: np.array([_[c] for c in s_cols]) for _ in p['stats'] if not _['playoffs']}
        tids = {_['season']: _['tid'] for _ in p['stats'] if not _['playoffs']}
        ratings =  {_['season']:np.array([_[c] for c in cols]) for _ in p['ratings']}
        for yr in stats:
            k = (yr, tids[yr])
            t_stats[k] = t_stats[k] + np.array(stats[yr])
            t_minutes[k] = t_minutes[k] + stats[yr][s_cols.index('min')]
        for yr in ratings:
            if yr not in tids:
                continue
            k = (yr, tids[yr])
            t_ratings[k] = t_ratings[k] + stats[yr][s_cols.index('min')]*np.array(ratings[yr])
        for yr in ratings:
            if yr < MIN_YEAR:
                continue
            if yr not in stats:
                continue
            stats_X.append(stats[yr])
            ratings_X.append(ratings[yr])
            names.append(p['firstName'] + ' ' + p['lastName'] + ' ' + str(yr))
            
    t_pace = {}
    if False:
        for t in data['teams']:
            for yr in t['stats']:
                if yr['playoffs']:
                    continue

                k = (t['tid'],yr['season'])
                opace = yr['fga'] + 0.44*yr['fta'] + yr['tov'] - yr['orb']
                dpace = yr['oppFga'] + 0.44*yr['oppFta'] + yr['oppTov'] - yr['oppOrb']
                t_pace[k] = {'off':opace,'def':dpace}
    else:
        t_pace = {k:v[s_cols.index('fga')] + 0.44*v[s_cols.index('fta')]+v[s_cols.index('tov')]-v[s_cols.index('orb')] for k,v in t_stats.items()}
        t_pace = {k:v/(1+t_minutes[k]) for k,v in t_pace.items()}
        t_avg_rating = {k:v/(1+t_minutes[k]) for k,v in t_ratings.items() if t_minutes[k] > 2000}

In [None]:
pace_reg_X = []
pace_reg_Y = []
for k in t_avg_rating:

    pace_reg_Y.append(t_pace[k])
    pace_reg_X.append(t_avg_rating[k])
    
clf_pace = linear.ElasticNetCV(fit_intercept=False,positive=True,l1_ratio=[.1,.3,.5,.7,.9,.95,.99,1],cv=10)
clf_pace.fit(pace_reg_X,pace_reg_Y)
S = abs(clf_pace.coef_).max()
print(np.round(clf_pace.intercept_/S,2))
for c,r in zip(cols,clf_pace.coef_):
    if abs(r/S) < 0.05:
        continue
    print(c,np.round(r/S,2))
print()

In [None]:
plt.scatter(clf_pace.predict(pace_reg_X),pace_reg_Y)

In [None]:
df = pd.DataFrame(np.hstack([ratings_X,stats_X]),columns = cols+ ['s_' + _ for _ in s_cols])
df.index = names
df = df[df['s_min'] > 850]


df['s_ftr'] = df['s_fta']/np.maximum(df['s_fga'],1)
df['s_tsa'] = df['s_fta']*0.44 + df['s_fga']

for stat in ['ast','orb','drb','tov','stl','blk','pf','tsa']:
    pstats = 's_'+stat
    df[pstats +'m'] = 36*df[pstats]/df['s_min'] 
df['s_trbm'] = df['s_orbm'] + df['s_drbm']

In [None]:
df[cols].shape,len(cols)

In [None]:
target_cols = ['s_trbm','s_drbm','s_orbm','s_astm','s_tovm','s_stlm','s_blkm','s_pfm']
target_cols = target_cols + ['s_ftr','s_tsam']
model_results = {}
l1_ratios = [0.1,0.3,0.5,0.9,0.95,0.99,0.999,0.9999,1]
l1_ratios = [1]
for target_col in target_cols:
    inX = np.array(df[cols]).astype(np.float64)
    outY = np.array(df[target_col]).astype(np.float64)

    models_simple = []
    models_worse = []

    for po in [1/10,1/8,1/7,1/6,1/5,1/4,1/3,1/2,0.75,1.0,1.25,1.5,1.75,2.0]:
        clf = linear.ElasticNetCV(fit_intercept=False,positive=True,l1_ratio=l1_ratios,cv=10)
        clf.fit(inX,outY**po,df.s_min)
        po_s = clf.score(inX,outY**po,df.s_min)
        models_simple.append((po_s,po,clf))

        clf = linear.ElasticNetCV(fit_intercept=True,positive=False,l1_ratio=l1_ratios,cv=10)
        clf.fit(inX,outY**po,df.s_min)
        po_s = clf.score(inX,outY**po,df.s_min)
        models_worse.append((po_s,po,clf))
        
    model_results[target_col] = (models_simple,models_worse)

In [None]:
model_choice = {}
for key in model_results:
    res1 = sorted(model_results[key][0])[-1][0]
    res2 = sorted(model_results[key][1])[-1][0]
    worse = res1-res2
    if worse > -0.08:
        ch = 0
    else:
        ch = 1
    print(key,worse)
    if key in ['s_pfm','s_ftr','s_stlm']:
        ch = 0
    #ch = 0
    model_choice[key] = sorted(model_results[key][ch])[-1]

In [None]:

model_choice['s_astm'] = sorted(model_results['s_astm'][0])[-3]
sorted(model_results['s_astm'][0])


In [None]:
for k,v in model_choice.items():
    clf = v[2]
    print(k,v[1])
    S = abs(clf.coef_).max()
    print(np.round(clf.intercept_/S,2))
    for c,r in zip(cols,clf.coef_):
        if abs(r/S) < 0.08:
            continue
        print(c,np.round(r/S,2))
    print()

In [None]:
k = 's_pfm'
v = model_choice[k]
v = sorted(res_m)[-1]
clf = v[2]
print(k,v[1])
S = abs(clf.coef_).max()*(390/150)
print(np.round(clf.intercept_/S,2))
for c,r in zip(cols,clf.coef_):
    if abs(r/S) < 0.05:
        continue
    print(c,np.round(r/S,2))
print()

In [None]:
target_col = 's_pfm'
inX = np.array(df[cols]).astype(np.float64)
outY = np.array(df[target_col]).astype(np.float64)

res_m = []
for po in [1/10,1/8,1/7,1/6,1/5,1/4,1/3,1/2,0.75,1.0,1.25,1.5,1.75,2.0]:
    clf = linear.ElasticNetCV(fit_intercept=False,positive=False,l1_ratio=l1_ratios,cv=10)
    clf.fit(inX,outY**po,df.s_min)
    po_s = clf.score(inX,outY**po,df.s_min)
    res_m.append((po_s,po,clf))
sorted(res_m)[-1],model_choices[target_col]

In [None]:
sorted(res_m)[-1]

In [None]:
sorted(model_results[target_col][0])[-1]

In [None]:
sorted(model_results[target_col][1])[-1]

In [None]:
1/1.75