In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
def kl_mvn(m0, S0, m1, S1):
    """
    Kullback-Liebler divergence from Gaussian pm,pv to Gaussian qm,qv.
    Also computes KL divergence from a single Gaussian pm,pv to a set
    of Gaussians qm,qv.
    Diagonal covariances are assumed.  Divergence is expressed in nats.

    - accepts stacks of means, but only one S0 and S1

    From wikipedia
    KL( (m0, S0) || (m1, S1))
         = .5 * ( tr(S1^{-1} S0) + log |S1|/|S0| + 
                  (m1 - m0)^T S1^{-1} (m1 - m0) - N )
    """
    # store inv diag covariance of S1 and diff between means
    N = m0.shape[0]
    iS1 = np.linalg.inv(S1)
    diff = m1 - m0

    # kl is made of three terms
    tr_term   = np.trace(iS1 @ S0)
    det_term  = np.log(np.linalg.det(S1)/np.linalg.det(S0)) #np.sum(np.log(S1)) - np.sum(np.log(S0))
    quad_term = diff.T @ np.linalg.inv(S1) @ diff #np.sum( (diff*diff) * iS1, axis=1)
    #print(tr_term,det_term,quad_term)
    return .5 * (tr_term + det_term + quad_term - N) 

In [None]:
gm = pd.read_csv('simmed_beta.csv')
start_idx = list(gm.columns).index('Hgt')

end_idx = list(gm.columns).index('PAc')
rating_cols = list(gm.columns)[start_idx:end_idx+1]
gm = gm[['Pos'] + rating_cols]


In [None]:

gm.iloc[:,1:] = np.array(gm.iloc[:,1:] + np.random.normal(0,1,size=(gm.iloc[:,1:].shape)))

gm = gm[~gm.Pos.isin(['K','P'])]
gmg = gm.groupby('Pos')#.mean()
mean = gmg[rating_cols].mean()
cov = gmg[rating_cols].cov()


In [None]:
tmpD = {}
for pos in mean.index:
    tmpDD = {}
    for pos2 in mean.index:
        #print(pos,pos2)
        tmpDD[pos2] = kl_mvn(mean.loc[pos],cov.loc[pos],mean.loc[pos2],cov.loc[pos2])
        if pos == pos2:
            tmpDD[pos2] = 0
    tmpD[pos] = tmpDD

In [None]:
from matplotlib.colors import LogNorm
import matplotlib.cm

tmpD_BBGM = tmpD
cmap = matplotlib.cm.get_cmap('inferno_r')

plt.imshow(pd.DataFrame(tmpD),norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('BBGM log of KL')

In [None]:
gm = pd.read_csv('simmed_stats.csv')
start_idx = list(gm.columns).index('Hgt')

end_idx = list(gm.columns).index('PAc')
rating_cols = list(gm.columns)[start_idx:end_idx+1]
gm = gm[['Pos'] + rating_cols]


gm.iloc[:,1:] = np.array(gm.iloc[:,1:] + np.random.normal(0,1,size=(gm.iloc[:,1:].shape)))

gm = gm[~gm.Pos.isin(['K','P'])]
gmg = gm.groupby('Pos')#.mean()
mean = gmg[rating_cols].mean()
cov = gmg[rating_cols].cov()

tmpD = {}
for pos in mean.index:
    tmpDD = {}
    for pos2 in mean.index:
        #print(pos,pos2)
        tmpDD[pos2] = kl_mvn(mean.loc[pos],cov.loc[pos],mean.loc[pos2],cov.loc[pos2])
        if pos == pos2:
            tmpDD[pos2] = 0
    tmpD[pos] = tmpDD
    
from matplotlib.colors import LogNorm
import matplotlib.cm

tmpD_BBGM_og = tmpD
cmap = matplotlib.cm.get_cmap('inferno_r')

plt.imshow(pd.DataFrame(tmpD),norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('BBGM log of KL')

In [None]:
gm = pd.read_csv('madden.csv')
gm = gm.iloc[:,:-3]
#gmg = gm.groupby('Pos')#.mean()
fix_map = {'DL':['RE','LE','DT','DE'],
           'OL':['LT','RT','C','LG','RG','OG','OT'],
           'S':['SS','FS','DB'],
           'RB':['HB','FB'],
           'LB':['LOLB','ROLB','MLB','ILB','OLB','EDGE'],
          }
gm = gm[~gm.position.isin(['K','P'])]
for new, old in fix_map.items():
    gm.loc[gm.position.isin(old),'position'] = new
gm.position.unique()


In [None]:
tdf = gm[[_ for _ in gm.columns if ('rating' in _ and 'ovr ' not in _) or (_ == 'position')]]
tdf.iloc[:,1:] = np.array(tdf.iloc[:,1:] + np.random.normal(0,1,size=(tdf.iloc[:,1:].shape)))

gmg = tdf.groupby('position')
mean = gmg.mean()
cov = gmg.cov()#.fillna(1e-2)
#cov.loc['LT']

In [None]:
# fix singular matrix
#sing_pos = ['K','P']
#for pos in sing_pos:   
#    cov.loc[pos]=np.array(np.identity(mean.shape[1])*1 +  cov.loc[pos])

In [None]:
tmpD = {}
for pos in mean.index:
    tmpDD = {}
    for pos2 in mean.index:
        #print(pos,pos2)
        tmpDD[pos2] = kl_mvn(mean.loc[pos],cov.loc[pos],mean.loc[pos2],cov.loc[pos2])
        if pos == pos2:
            tmpDD[pos2] = 0
    tmpD[pos] = tmpDD

In [None]:
from matplotlib.colors import LogNorm
import matplotlib.cm
madden_tmpD = tmpD
plt.imshow(pd.DataFrame(tmpD),norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('madden log of KL')

In [None]:
combine = pd.concat([pd.read_html('2017_combine.htm')[-1],pd.read_html('2018_combine.htm')[-1],pd.read_html('2019_combine.htm')[-1]])
combine = combine[['Pos','Ht','Wt','40yd','Vertical','Bench','Broad Jump','3Cone','Shuttle']].fillna('')
combine =combine[~combine.iloc[:,2:].applymap(lambda x: x.isalpha()).any(1)]
combine['Ht'] = combine['Ht'].map(lambda x: 12*int(x.split('-')[0]) + int(x.split('-')[1]))
cols = [_ for _ in combine.columns if _ != 'Pos']
combine[cols] = combine[cols].apply(pd.to_numeric, errors='coerce', axis=1)

In [None]:

combine = combine[~combine.Pos.isin(['K','P','LS',''])]
for new, old in fix_map.items():
    combine.loc[combine.Pos.isin(old),'Pos'] = new
combine.Pos.unique()

In [None]:
combine.loc[combine.Pos=='QB','Bench'] = np.random.normal(16,5,size=sum(combine.Pos == 'QB'))
print(combine)

In [None]:
### gmg = combine.groupby('Pos')
mean = gmg.mean()
cov = gmg.cov()#.fillna(1e-2)
print(gmg.count())
tmpD = {}
for pos in mean.index:
    tmpDD = {}
    for pos2 in mean.index:
        #print(pos,pos2)
        tmpDD[pos2] = kl_mvn(mean.loc[pos],cov.loc[pos],mean.loc[pos2],cov.loc[pos2])
        if pos == pos2:
            tmpDD[pos2] = 0
    tmpD[pos] = tmpDD

In [None]:
pos,pos2

In [None]:
from matplotlib.colors import LogNorm
import matplotlib.cm

plt.imshow(pd.DataFrame(tmpD),norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('combine log of KL')

In [None]:
plt.figure(dpi=120,figsize=(8,4))
cmap = matplotlib.cm.get_cmap('inferno_r')

plt.subplot(1,3,1)
plt.style.use('seaborn-white')
plt.imshow(pd.DataFrame(tmpD_BBGM),norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('FBGM beta log of KL')
plt.subplot(1,3,2)
d1 = pd.DataFrame(tmpD_BBGM_og)
plt.imshow(d1,norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('FBGM log of KL')
plt.subplot(1,3,3)
d1 = pd.DataFrame(tmpD)
plt.imshow(d1,norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('combine log of KL')
plt.tight_layout()

In [None]:
d1 = pd.DataFrame(tmpD_BBGM)/pd.DataFrame(madden_tmpD)
cmap = matplotlib.cm.get_cmap('RdBu_r')
plt.imshow(d1,norm=LogNorm(),cmap=cmap)
plt.xticks(np.arange(len(mean.index)),mean.index)
plt.yticks(np.arange(len(mean.index)),mean.index)
plt.title('BBGM Ratings vs. Madden\nRed: BBGM too different, Blue: BBGM too similar')
plt.tight_layout()