**Implements Elo ratings**

Elo rating is a measure of relative ability in competitive sports (two players or two teams play against each other to decide who wins or loses).

My resources for this were:
* [FiveThirtyEight_Elo_ratings](https://www.kaggle.com/lpkirwin/fivethirtyeight-elo-ratings)
* [Elo rating system from wikipedia](https://en.wikipedia.org/wiki/Elo_rating_system)

In [None]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

from sklearn.metrics import log_loss

In [None]:
K = 20.
HOME_ADVANTAGE = 100.

In [None]:
rs = pd.read_csv("../input/mens-march-mania-2022/MDataFiles_Stage1/MRegularSeasonCompactResults.csv")
rs.head()

In [None]:
team_ids = set(rs.WTeamID).union(set(rs.LTeamID))
len(team_ids)

In [None]:
elo_dict = dict(zip(list(team_ids), [1500] * len(team_ids)))

In [None]:
rs['margin'] = rs.WScore - rs.LScore

In [None]:
def elo_pred(elo1, elo2):
    return(1. / (10. ** (-(elo1 - elo2) / 400.) + 1.))

def expected_margin(elo_diff):
    return((7.5 + 0.006 * elo_diff))

def elo_update(w_elo, l_elo, margin):
    elo_diff = w_elo - l_elo
    pred = elo_pred(w_elo, l_elo)
    mult = ((margin + 3.) ** 0.8) / expected_margin(elo_diff)
    update = K * mult * (1 - pred)
    return(pred, update)

In [None]:
assert np.all(rs.index.values == np.array(range(rs.shape[0]))), "Index is out of order."

In [None]:
preds = []
w_elo = []
l_elo = []

for row in rs.itertuples():
    
    w = row.WTeamID
    l = row.LTeamID
    margin = row.margin
    wloc = row.WLoc
    
    w_ad, l_ad, = 0., 0.
    if wloc == "H":
        w_ad += HOME_ADVANTAGE
    elif wloc == "A":
        l_ad += HOME_ADVANTAGE
    
    pred, update = elo_update(elo_dict[w] + w_ad,
                              elo_dict[l] + l_ad, 
                              margin)
    elo_dict[w] += update
    elo_dict[l] -= update
    
    preds.append(pred)
    w_elo.append(elo_dict[w])
    l_elo.append(elo_dict[l])

In [None]:
rs['w_elo'] = w_elo
rs['l_elo'] = l_elo

In [None]:
rs.tail()

In [None]:
np.mean(-np.log(preds))

In [None]:
def final_elo_per_season(df, team_id):
    d = df.copy()
    d = d.loc[(d.WTeamID == team_id) | (d.LTeamID == team_id), :]
    d.sort_values(['Season', 'DayNum'], inplace=True)
    d.drop_duplicates(['Season'], keep='last', inplace=True)
    w_mask = d.WTeamID == team_id
    l_mask = d.LTeamID == team_id
    d['season_elo'] = None
    d.loc[w_mask, 'season_elo'] = d.loc[w_mask, 'w_elo']
    d.loc[l_mask, 'season_elo'] = d.loc[l_mask, 'l_elo']
    out = pd.DataFrame({
        'team_id': team_id,
        'season': d.Season,
        'season_elo': d.season_elo
    })
    return(out)

In [None]:
df_list = [final_elo_per_season(rs, id) for id in team_ids]
season_elos = pd.concat(df_list)

In [None]:
season_elos.head()