In [1]:
import random

import numpy as np
import pandas as pd

from src.dataset import get_dataset
from src.models.elog import ELOgPredictor

In [2]:
ITERATIONS = 1
random.seed(5438)
np.random.seed(5438)

In [3]:
dataset = get_dataset()

  mls_df = pd.read_csv("data/mls_matches.csv")


In [4]:
dataset

Unnamed: 0,team_id,opponent_id,team_at_home,opponent_at_home,team_score,opponent_score,fold
0,Scotland,England,1.0,0.0,0,0,international
1,England,Scotland,1.0,0.0,4,2,international
2,Scotland,England,1.0,0.0,2,1,international
3,England,Scotland,1.0,0.0,2,2,international
4,Scotland,England,1.0,0.0,3,0,international
...,...,...,...,...,...,...,...
143000,Sport Lisboa e Benfica,Sporting Clube de Braga,1.0,0.0,3,0,europe
143001,Panathinaikos Athlitikos Omilos,APS Atromitos Athinon,1.0,0.0,2,1,europe
143002,Fulham Football Club,Watford FC,1.0,0.0,4,1,europe
143003,Panthessalonikios Athlitikos Omilos Konstantin...,Athlitiki Enosi Konstantinoupoleos,1.0,0.0,1,1,europe


In [5]:
elo = ELOgPredictor()

In [6]:
X = dataset[["team_id", "opponent_id", "team_at_home", "opponent_at_home"]]
y = dataset[["team_score", "opponent_score"]]

In [7]:
elo.fit(X, y)

In [8]:
elo.elo.rating

{'Scotland': 1739.04707105471,
 'England': 2044.9274120277767,
 'Wales': 1762.7542921006261,
 'Northern Ireland': 1602.422163203526,
 'United States': 1862.8628096796817,
 'Canada': 1835.472531393626,
 'Uruguay': 2008.012439854341,
 'Argentina': 2180.114498563896,
 'Austria': 1902.932291161174,
 'Hungary': 1808.3566778591107,
 'Czechoslovakia': 1779.3264170808507,
 'Belgium': 2022.9637165155807,
 'France': 2099.9630877649315,
 'Switzerland': 1888.9749687820922,
 'Guernsey': 1774.0458685698165,
 'Alderney': 1058.9755555684515,
 'Jersey': 1816.3992242520656,
 'Netherlands': 2026.4534453944188,
 'Germany': 2025.6610693215455,
 'Sweden': 1799.07924926261,
 'Norway': 1794.1250431278213,
 'Italy': 2011.8774085797154,
 'Chile': 1780.5276596383144,
 'Finland': 1641.3176031210423,
 'Luxembourg': 1428.948394985113,
 'Catalonia': 1669.0969962745894,
 'Russia': 1838.731133884095,
 'Denmark': 1940.903686143524,
 'Philippines': 1293.7458027246785,
 'China PR': 1572.2506219428847,
 'Brazil': 2141.143

In [9]:
ratings = pd.DataFrame(
    [{"team": key, "rating": value} for key, value in elo.elo.rating.items()]
)

In [10]:
ratings.sort_values("rating", ascending=False)[:20]

Unnamed: 0,team,rating
7,Argentina,2180.114499
41,Spain,2174.582155
30,Brazil,2141.1436
12,France,2099.963088
85,Colombia,2089.151573
1,England,2044.927412
42,Portugal,2042.411611
17,Netherlands,2026.453445
18,Germany,2025.661069
11,Belgium,2022.963717


In [11]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Brazil",
                "opponent_id": "Argentina",
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.31460674, 0.28372559, 0.40166767]])

In [12]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Argentina",
                "opponent_id": "Brazil",
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.40166393, 0.28372598, 0.31461009]])

In [13]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": random.random(),
                "opponent_id": random.random(),
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.35695543, 0.28608557, 0.356959  ]])

In [14]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": random.random(),
                "opponent_id": random.random(),
                "team_at_home": 1.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.42654169, 0.28053235, 0.29292596]])

In [15]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": random.random(),
                "opponent_id": random.random(),
                "team_at_home": 0.0,
                "opponent_at_home": 1.0,
            },
        ]
    )
)

array([[0.29292273, 0.28053177, 0.4265455 ]])

In [16]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Brazil",
                "opponent_id": random.random(),
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.92678624, 0.04944971, 0.02376405]])

In [17]:
elo.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Brazil",
                "opponent_id": random.random(),
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.92678624, 0.04944971, 0.02376405]])