In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn import linear_model, preprocessing


In [None]:
with open('history.json','rb') as fp:
    data = json.load(fp)

In [None]:
teams = {_['tid']:_ for _ in data['teams']}

In [None]:
team_seasons = {}
for tid,t in teams.items():
    team_s = {}
    for s in t['seasons']:
        year = s['season']
        if int(s['gp']) < 1:
            continue
        winp = int(s['won'])/int(s['gp'])
        team_s[year] = winp
    team_seasons[tid] = team_s

In [None]:
team_year_ewa = defaultdict(lambda: defaultdict(lambda: 0.01))
team_year_ws = defaultdict(lambda: defaultdict(lambda: 0.01))
team_year_dws = defaultdict(lambda: defaultdict(lambda: 0.01))
min_min = 1917*0.5 # 1917 min for MVP
minYear = 1980
maxYear = 2021
award_types = []
for p in data['players']:
    award_types += [_['type'] for _ in p['awards']]
    stats = [_ for _ in p['stats'] if not _['playoffs'] and _['season'] > minYear and _['min'] > min_min and _['season'] < maxYear]
    for a in stats:
        team_year_ewa[a['tid']][a['season']] += a['ewa']
        team_year_ws[a['tid']][a['season']] += a['ows'] + a['dws']
        team_year_dws[a['tid']][a['season']] += a['dws']
award_types = set(award_types)

In [None]:
award_types

In [None]:
awards = [
    {'name':'First Team All-League','bench':False,'rookie':False,'stats':['winp','ewa','vorp','frac_ws']},
    {'name':'Sixth','bench':True,'rookie':False,'stats':['winp','pts_g','ewa','vorp','ws']},
    {'name':'Rookie of','bench':False,'rookie':True,'stats':['ewa','vorp','pts_g']},
    {'name':'First Team All-Defensive','bench':False,'rookie':False,'stats':['dws','blk_g','stl_g']}
    ]
for award in awards:
    res = []
    for p in data['players']:
        years = [_['season'] for _ in p['awards'] if award['name'] in _['type']]
        years = set([y for y in years if y > minYear and y < maxYear])
        if award['rookie']:
            stats = [_ for _ in p['stats'][:1] if not _['playoffs'] and _['season'] > minYear and _['min'] > min_min and _['season'] < maxYear]
        else:
            stats = [_ for _ in p['stats'] if not _['playoffs'] and _['season'] > minYear and _['min'] > min_min and _['season'] < maxYear]
        if award['bench']:
            stats = [_ for _ in stats if _['gp'] > 0 and _['gs']/_['gp'] < 0.5]
        for a in stats:
            a['wonAward'] = int(a['season'] in years)
            a['frac_ewa'] = a['ewa']/team_year_ewa[a['tid']][a['season']]
            a['frac_ws'] = (a['ows'] + a['dws'])/team_year_ws[a['tid']][a['season']]
            a['frac_dws'] = (a['dws'])/team_year_dws[a['tid']][a['season']]

            a['winp'] = team_seasons[a['tid']][a['season']]
            a['name'] = p['firstName'] + ' ' + p['lastName']
        res += stats
    df  = pd.DataFrame(res)
    df['ws'] = df['ows'] + df['dws']
    for stat in ['pts','stl','blk']:
        df[stat + '_g'] = df[stat]/np.maximum(df['gp'],1) 
    df2 = df[[_ for _ in df.columns if _!= 'wonAward']]
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    df3 = df2.select_dtypes(include=numerics)
    x2 = df3[award['stats']]

    x3 = (x2-x2.mean(0))/x2.std(0)
    clf = linear_model.LogisticRegressionCV(Cs=100,cv=3,max_iter=250)
    clf.fit(x3,df.wonAward)
    print(award['name'])
    C = clf.coef_.ravel() / x2.std(0)
    C = C/C.max()
    print(round(1/C,1))
    print()