In [1]:
import torch

from data_management.data_acquisition import DataAcquisition
from data_management.data_transformation import DataTransformation
from data_management import FROM_CSV
import pandas as pd
from datetime import timedelta
from datetime import datetime
from models.ratings import EloGrad, EloManual, EloAutoGrad
import numpy as np

  "class": algorithms.Blowfish,


In [2]:
da = DataAcquisition()
df = da.get_data(FROM_CSV, fname="../resources/other_leagues.csv")
df['DT'] = pd.to_datetime(df['DT'], format="%Y-%m-%d %H:%M:%S")
data_transform = DataTransformation(df, timedelta(365))
df = df[(df['League'] != 'EuroLeague') & (df['League'] != 'EuroCup')] 
df = df.reset_index()

transform = DataTransformation(df, timedelta(365))
dataset = transform.get_dataset(node_f_extract=False, edge_f_one_hot=True)

team_count = transform.num_teams

2024-02-26 14:05:03.144 | INFO     | data_management._data_saving_loading:load_data_csv:70 - 21100 rows loaded from ../resources/other_leagues.csv


### Manual Elo with NN pass

In [3]:
# dummy dataset

delta = timedelta(days=365)
delta2 = timedelta(days=366)
now = datetime.now()
data = pd.DataFrame({'DT': [*(3 * [now]), *(3 * [now - delta2]), *(3 * [now - 2 * delta2]), *(3 * [now - 3 * delta2])], 
                     'Home': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
                     'Away': ['B', 'C', 'A', 'C', 'A', 'B', 'B', 'C', 'A', 'C', 'A', 'B'],
                     'Winner': ['home', 'away', 'away', 'home', 'away', 'away', 'home', 'home', 'away', 'home', 'away', 'home'],
                     'Home_points': [10, 5, 15, 12, 15, 6, 20, 10, 10, 14, 3, 12],
                     'Away_points': [4, 8, 17, 10, 16, 14, 18, 9, 15, 0, 11, 4],
                     'League': [*(12 * ['liga'])],
                     })

transform2 = DataTransformation(data, timedelta(days=365))
dts = transform2.get_dataset(edge_f_one_hot=True)

def train_elo_manual(train_dataset, model, epochs: int = 100, verbose: bool = False):
    torch.set_printoptions(precision=8) 
    training_accuracy = []
    loss_fn = torch.nn.MSELoss
    
    E_HS = []
    elos = []
    pts = []
    
    for epoch in range(epochs):
        model.train()
        accuracy, loss, count = 0, 0, 0
        iter = 0
        for time, snapshot in enumerate(train_dataset):
            # pass through network has to be only one by one in order to compute elo correctly
            matches = snapshot.edge_index
            match_points = snapshot.match_points
            for m in range(matches.shape[1]):
                match = matches[:, m]
                
                y_hat = model(match)
                y = snapshot.edge_attr[m, :]  # edge weight encodes the match outcome
                
                E_HS.append(y_hat)
                
                elos_b4 = (model.rating[model.home], model.rating[model.away])
                  
                target = torch.argmax(y) / 2.
                target = target.detach()
                prediction = y_hat
                accuracy += 1 if abs(target - prediction) < 0.33 else 0
    
                loss += torch.mean((prediction-target)**2)
                
                point_diff = torch.abs(match_points[m, 0] - match_points[m, 1]).detach()
                result = [target, point_diff]
                
                model.backward(result)
                
                elos.append((elos_b4, (model.rating[model.home], model.rating[model.away])))
                pts.append(((match_points[m, 0], match_points[m, 1]), (model.home, model.away)))
                
                # if verbose:
                #    rating = model.rating[:5] if len(model.rating) >= 5 else model.rating
                #    print(f"Neuralnet {iter}, rating: {rating}, E_H = {y_hat}; {target}")
                iter += 1
                
            count += matches.shape[1]

        if verbose:
            rating = model.rating[:5] if len(model.rating) >= 5 else model.rating
            print(f'[TRN] '
                  f' Epoch: {epoch}, training loss: {loss:.3f}, '
                        f'training accuracy: {accuracy/count * 100:.2f}% \n'
                        f'ratings (first 5): {rating}')
        training_accuracy.append(accuracy/count * 100)
    
    return np.array(training_accuracy), E_HS, elos, pts


def compute_elo(matches: pd.DataFrame, mapping, elo_base: int = 1000, gamma: float = 2, c: float = 3, d: float = 500, k: float = 3, verbose: bool = False):
    elo = np.zeros((len(mapping), )) + elo_base
    E_HS = []
    elos = []
    for i in range(len(matches.index)):
        match_i = matches.iloc[i]
        h_i = mapping[match_i['Home']]
        a_i = mapping[match_i['Away']]
        
        E_h = 1 / (1 + np.power(c, ((elo[a_i] - elo[h_i]) / d)))
        S_h = 1. if match_i['Winner'] == 'home' else 0. if match_i['Winner'] == 'away' else 1/2
        
        E_HS.append(E_h)
        
        elos_b4 = (elo[h_i], elo[a_i])
        
        h_points = match_i['Home_points']
        a_points = match_i['Away_points']
        
        delta = abs(h_points - a_points)
        update = k * ((1 + delta)**gamma) * (S_h - E_h)
        
        elo[h_i] += update
        elo[a_i] -= update
        
        elos.append((elos_b4, (elo[h_i], elo[a_i])))
        
        if verbose:
            print(f"iteration {i}, rating: {elo}, E_H = {E_h}; {S_h}")
    return np.array(elo), E_HS, elos
    
    
print('Computed elo Dummy: ', compute_elo(data, transform2.team_mapping, verbose=False)[0])
eloDummy = EloManual(team_count=transform2.num_teams)
acc_dummy = train_elo_manual(dts, eloDummy, 1, True)

print()
print('----------------------------------------')
print()
computed, E_HS_comp, elos_comp = compute_elo(df, transform.team_mapping, 1000, 2., 3., 500., 3.)

print()
elo = EloManual(team_count=transform.num_teams)
acc_late, E_HS_net, elos_net, pts = train_elo_manual(dataset, elo, 1, False)

err = False
for i in range(len(computed)):
    cmp = float(computed[transform.team_mapping[transform.inv_team_mapping[i]]])
    net = float(elo.rating[transform.team_mapping[transform.inv_team_mapping[i]]])
    if cmp - net > 0.1:
        print(rf'ERROR on index {i}:: {cmp} / {net}')
        err = True
    if i < 5:
        print(f'{i}:: computed: {cmp:10.3f} || net: {net:10.3f}')
if not err:
    print("...")
    print("[SUCCESS]: Computed elo is the same as Elo from NN")

print_diff = False
if print_diff: 
    inv_map = {v: k for k, v in transform.team_mapping.items()}
    for i in range(len(E_HS_net)):
        eq = E_HS_net[i] == E_HS_comp[i]
        
        str_i = f'{i}:: net: {E_HS_net[i]:2.2f}, comp: {E_HS_comp[i]:2.2f};;; elo net / comp: {elos_net[i][1][0]:10.2f} / {elos_comp[i][1][0]:10.2f} :: {elos_net[i][1][1]:10.2f} / {elos_comp[i][1][1]:10.2f}'
        
        print(str_i if eq else '----------> ' + str_i + f''' {df.iloc[i]["Home"]}  home_pts: {df.iloc[i]["Home_points"]}, {df.iloc[i]["Away"]}  away_pts: {df.iloc[i]["Away_points"]};;;;;;;;; pts net: {pts[i][0][0]}, {pts[i][0][1]}''')
        if not eq:
            print(f'elo before: {elos_net[i][0][0]:10.2f} / {elos_comp[i][0][0]:10.2f} :: {elos_net[i][0][1]:10.2f} / {elos_comp[i][0][1]:10.2f}')
            print(f'Teams: home {inv_map[int(pts[i][1][0])]} away {inv_map[int(pts[i][1][1])]}')
            break 


Computed elo Dummy:  [1450.41466185  783.32864878  766.25668937]
[TRN]  Epoch: 0, training loss: 2.543, training accuracy: 16.67% 
ratings (first 5): Parameter containing:
tensor([1450.41466185,  783.32864878,  766.25668937], dtype=torch.float64,
       requires_grad=True)

----------------------------------------
0:: computed:   1379.688 || net:   1379.688
1:: computed:   2976.967 || net:   2976.967
2:: computed:    384.481 || net:    384.481
3:: computed:    571.470 || net:    571.470
4:: computed:   1309.883 || net:   1309.883
...
[SUCCESS]: Computed elo is the same as Elo from NN


### Elo with gradient

first lets check our analytical backward pass

In [4]:
from torch.autograd import gradcheck
from models.ratings._elo._grad import elo_function
# gradcheck takes a tuple of tensors as input, check if your gradient
# evaluated with these tensors are close enough to numerical
# approximations and returns True if they all verify this condition.
input = (torch.randn(1,dtype=torch.double,requires_grad=True), torch.randn(1,dtype=torch.double,requires_grad=True), torch.randn(1,dtype=torch.double,requires_grad=True), torch.randn(1,dtype=torch.double,requires_grad=True))
test = gradcheck(elo_function, input, eps=1e-6, atol=1e-4)
print(test)

True


Define training function for elo

In [5]:
from models.loss import WeightedMSELoss
from torch.optim import Adam

def train_elo_grad(train_dataset, model, epochs: int = 1, verbose: bool = False, lr:float = -1, clip_grad: bool = False):
    torch.set_printoptions(precision=8) 
    training_accuracy = []
    loss_fn = WeightedMSELoss()
    
    lr = model.k if lr == -1 else lr
    
    optim = Adam(model.parameters(), lr=lr)
    
    E_HS = []
    elos = []
    pts = []
    
    for epoch in range(epochs):
        model.train()
        accuracy, loss, count = 0, 0, 0
        iter = 0
        for time, snapshot in enumerate(train_dataset):
            # pass through network has to be only one by one in order to compute elo correctly
            matches = snapshot.edge_index
            match_points = snapshot.match_points
            for m in range(matches.shape[1]):
                optim.zero_grad()
                
                match = matches[:, m]
                
                y_hat = model(match)
                y = snapshot.edge_attr[m, :]  # edge weight encodes the match outcome
                y.requires_grad = True
                E_HS.append(y_hat)
                
                elos_b4 = (model.rating[model.home], model.rating[model.away])
                  
                target = torch.argmax(y) / 2.
                target = target.detach()
                prediction = y_hat
                accuracy += 1 if abs(target - prediction) < 0.33 else 0
                
                point_diff = torch.abs(match_points[m, 0] - match_points[m, 1])
                loss = loss_fn(y, y_hat, (point_diff + 1) ** model.gamma)
                
                loss.backward()
                
                if clip_grad:
                    # Clip gradients to prevent explosion
                    # This should be used when training c, d hyper params as well
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  
        
                optim.step()
                
                elos.append((elos_b4, (model.rating[model.home], model.rating[model.away])))
                pts.append(((match_points[m, 0], match_points[m, 1]), (model.home, model.away)))
                
                # if verbose:
                #    rating = model.rating[:5] if len(model.rating) >= 5 else model.rating
                #    print(f"Neuralnet {iter}, rating: {rating}, E_H = {y_hat}; {target}")
                iter += 1
                
            count += matches.shape[1]

        if verbose:
            rating = model.rating[:5] if len(model.rating) >= 5 else model.rating
            print(f'[TRN] '
                  f' Epoch: {epoch}, training loss: {loss:.3f}, '
                        f'training accuracy: {accuracy/count * 100:.2f}% \n'
                        f'ratings (first 5): {rating}')
        training_accuracy.append(accuracy/count * 100)
    
    return np.array(training_accuracy), E_HS, elos, pts



Now, let's train elo with analytical backward pass

In [6]:
elo_grad = EloGrad(team_count, cd_grad=False)
acc_grad, ehs_grad, elos_grad, pts_grad = train_elo_grad(dataset, elo_grad, clip_grad=False)

And Elo with autograd (numerical gradient backward pass)

In [7]:
elo_auto = EloAutoGrad(team_count, cd_grad=False)
acc_auto, ehs_auto, elos_auto, pts_auto = train_elo_grad(dataset, elo_auto, clip_grad=False)

And finally, compare wheter these two are the same
___________________

In [8]:
err = False
eps = 1e-3
for i in range(len(elo_auto.rating)):
    auto = float(elo_auto.rating[transform.team_mapping[transform.inv_team_mapping[i]]])
    grad = float(elo_grad.rating[transform.team_mapping[transform.inv_team_mapping[i]]])
    if auto - grad > eps:
        print(rf'ERROR on index {i}:: {auto} / {grad}')
        err = True
    if i < 5:
        print(f'{i}:: computed: {auto:10.3f} || net: {grad:10.3f}')
if not err:
    print("...")
    print("[SUCCESS]: All elo ratings computed analytically and numerically are the SAME")

0:: computed:    984.510 || net:    984.510
1:: computed:   1006.767 || net:   1006.767
2:: computed:    950.290 || net:    950.290
3:: computed:    969.327 || net:    969.327
4:: computed:   1059.368 || net:   1059.368
...
[SUCCESS]: All elo ratings computed analytically and numerically are the SAME


Now let's see what happens, when we let the model learn the c, d metaparameters as well

In [9]:
elo_auto2 = EloAutoGrad(team_count, cd_grad=True)
acc_auto2, ehs_auto2, elos_auto2, pts_auto2 = train_elo_grad(dataset, elo_auto2, lr=1e-1, clip_grad=True)

In [10]:
elo_grad2 = EloGrad(team_count, cd_grad=True)
acc_grad2, ehs_grad2, elos_grad2, pts_grad2 = train_elo_grad(dataset, elo_grad2, lr=1e-1, clip_grad=True)

In [11]:
err = False
eps = 1e-2
err_sum = 0
err_count = 0
print_count = 0
for i in range(len(elo_auto2.rating)):
    auto = float(elo_auto2.rating[transform.team_mapping[transform.inv_team_mapping[i]]])
    grad = float(elo_grad2.rating[transform.team_mapping[transform.inv_team_mapping[i]]])
    diff = abs(auto - grad)
    if diff > eps:
        err = True
        err_sum += diff
        err_count += 1
    if print_count < 5:
        str_i = rf'[ERROR] on index {i}:: {auto} / {grad}' if err else f'{i}:: computed: {auto:10.3f} || net: {grad:10.3f}'
        print(str_i)
        print_count += 1
        
print("...")

if not err:
    print("[SUCCESS]: All elo ratings computed analytically and numerically are the SAME")
else:
    print(f"Total number of errors: {err_count} out of {len(elo_auto2.rating)} computed ratings")
    print(f"Cumulative sum of errors: {err_sum}")
    print(f"Average difference: {err_sum / err_count}")
    
print(f"Numerical:  c = {float(elo_auto2.c):8.3f} :: d = {float(elo_auto2.d):8.3f}")
print(f"Analytical: c = {float(elo_grad2.c):8.3f} :: d = {float(elo_grad2.d):8.3f}")

[ERROR] on index 0:: 999.307507900627 / 999.5808169078236
[ERROR] on index 1:: 1015.5786790777033 / 1014.4905387700188
[ERROR] on index 2:: 987.3159453166492 / 988.3789993174113
[ERROR] on index 3:: 991.7779240243041 / 992.8502762032455
[ERROR] on index 4:: 1008.0524710996804 / 1008.258919096344
...
Total number of errors: 136 out of 146 computed ratings
Cumulative sum of errors: 90.4433938192243
Average difference: 0.6650249545531198
Numerical:  c =   25.589 :: d =  491.166
Analytical: c =   25.589 :: d =  491.166
