In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(1, '..')
from cfb_api_key import api_key
from cfb_tools.CfbDataManager import CfbDataManager
from math import e

In [None]:
csv_name = '../data/cfbd/games.csv'
df = CfbDataManager.from_file(csv_name)

In [None]:
from scipy.stats import  norm

def logistic_cdf(x, power=2*e, scale=1., loc=0.):
    return 1./(1.+power**(-(x-loc)/scale))
def logistic_pdf(x, power=2*e, scale=1., loc=0.):
    exp_term = power**(-(x-loc)/scale)
    square_logistic = logistic_cdf(x, power=power, scale=scale, loc=loc)**2
    return exp_term*np.log(power)/scale*square_logistic

# 5.0868 is a best fit to a normal distribution pdf
# 5.479 for cdf
class logistic():
    def __init__(self,power=5.0868, loc=0., scale=1.):
        self.power = power
        self.loc = loc
        self.scale = scale
    def params(self):
        return {'power':self.power,'loc':self.loc,'scale':self.scale}
    def cdf(self, x, **kwargs):
        kwargs = {**self.params(), **kwargs}
        return logistic_cdf(x, **kwargs)
    def pdf(self, x, **kwargs):
        kwargs = {**self.params(), **kwargs}
        return logistic_pdf(x, **kwargs)    
log = logistic()

In [None]:
x = np.linspace(-10,10,1000)
y_norm = norm.cdf(x,loc=0,scale=2.5)
y_logistic = log.cdf(x,loc=0,scale=2.5)

plt.plot(x,y_norm,label='normal cdf')
plt.plot(x,y_logistic,label='logistic cdf')
plt.legend()

In [None]:
x = np.linspace(-10,10,1000)
y_norm = norm.pdf(x,loc=0,scale=2.5)
y_logistic = log.pdf(x,loc=0,scale=2.5)

plt.plot(x,y_norm,label='normal pdf')
plt.plot(x,y_logistic,label='logistic pdf')
plt.legend()

In [None]:
team_list = {team:0 for team in df.view(season=2020).teams()}
games = df.view(season=2020).teams_and_scores()
win_prob = lambda team1,team2: logistic_cdf(team1-team2, scale=17)

In [None]:
verbose = False
kFactor = 17.
count = 0
for i,row in games.iterrows():
    if count%1000==0: print("{}/{}".format(count,games.shape[0]))
    count+=1
    
    if verbose: print(row)
    away_elo = team_list[row.away_team]
    home_elo = team_list[row.home_team]
    pred_win_prob = win_prob(home_elo,away_elo)
    home_wins = row.home_points > row.away_points
    update_factor = kFactor*(home_wins-pred_win_prob)
    team_list[row.away_team] += -update_factor
    team_list[row.home_team] += update_factor
    if verbose:
        print("home win probability: {} home won? {} update factor: {}".format(pred_win_prob,home_wins,update_factor))
        print("new home elo: {} new away elo: {}".format(team_list[row.home_team],team_list[row.away_team]))

In [None]:
sorted_teams = sorted(team_list.keys(),key=lambda x: -team_list[x])

In [None]:
for i, team in enumerate(sorted_teams):
    print("#{} {}: {:.2f}".format(i+1, team,team_list[team]))
    if i+1 == 25: break 

In [None]:
plt.hist(team_list.values(),bins=np.linspace(-50,50,20))
plt.xlabel('final power ratings')
plt.title('2020 elo histogram')

In [None]:
team_list['Alabama']-team_list['LSU']