In [1]:
import random

import numpy as np
import pandas as pd

from src.dataset import get_dataset
from src.models.elog import ELOgPredictor

In [2]:
ITERATIONS = 1
random.seed(5438)
np.random.seed(5438)

In [3]:
dataset = get_dataset()

  mls_df = pd.read_csv("data/mls_matches.csv")


In [4]:
dataset

Unnamed: 0,team_id,opponent_id,team_at_home,opponent_at_home,team_score,opponent_score,fold
0,Scotland,England,1.0,0.0,0,0,international
1,England,Scotland,1.0,0.0,4,2,international
2,Scotland,England,1.0,0.0,2,1,international
3,England,Scotland,1.0,0.0,2,2,international
4,Scotland,England,1.0,0.0,3,0,international
...,...,...,...,...,...,...,...
143000,Sport Lisboa e Benfica,Sporting Clube de Braga,1.0,0.0,3,0,europe
143001,Panathinaikos Athlitikos Omilos,APS Atromitos Athinon,1.0,0.0,2,1,europe
143002,Fulham Football Club,Watford FC,1.0,0.0,4,1,europe
143003,Panthessalonikios Athlitikos Omilos Konstantin...,Athlitiki Enosi Konstantinoupoleos,1.0,0.0,1,1,europe


In [5]:
model = ELOgPredictor()

In [6]:
X = dataset[["team_id", "opponent_id", "team_at_home", "opponent_at_home"]]
y = dataset[["team_score", "opponent_score"]]

In [7]:
model.fit(X, y)

In [18]:
model.logit.model.predict(
    model.logit.params, exog=pd.DataFrame(data=[{"predicted_score_difference": 0.0}])
)

array([[0.35695543, 0.28608557, 0.356959  ]])

In [19]:
model.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Argentina",
                "opponent_id": "Brazil",
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.35695543, 0.28608557, 0.356959  ]])

In [20]:
model.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Brazil",
                "opponent_id": "Argentina",
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.35695543, 0.28608557, 0.356959  ]])

In [21]:
model.predict_and_update(X, y)

array([[0.42654169, 0.28053235, 0.29292596],
       [0.42858287, 0.28021552, 0.29120161],
       [0.39534216, 0.28433303, 0.3203248 ],
       ...,
       [0.7766678 , 0.14193725, 0.08139495],
       [0.40271483, 0.28361681, 0.31366837],
       [0.83724712, 0.10623802, 0.05651486]], shape=(143005, 3))

In [23]:
model.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Argentina",
                "opponent_id": "Brazil",
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.40166393, 0.28372598, 0.31461009]])

In [13]:
teams_df = dataset[dataset["fold"].isin(["international"])]
teams = set(teams_df["team_id"].unique().tolist()).union(set(teams_df["opponent_id"].unique().tolist()))
ranking = {team: 0 for team in teams}

for team in ranking:
    for opponent in ranking:
        if team == opponent:
            continue
        pred = model.predict(
            pd.DataFrame(
                [
                    {
                        "team_id": team,
                        "opponent_id": opponent,
                        "team_at_home": 1.0,
                        "opponent_at_home": 0.0,
                    },
                ]
            )
        )
        ranking[team] += float(3 * pred[0][0] + 1 * pred[0][1])

In [14]:
pd.DataFrame(list(ranking.items()), columns=['Team', 'Score']).sort_values(by='Score', ascending=False).reset_index(drop=True).head(20)

Unnamed: 0,Team,Score
0,Argentina,943.873707
1,Spain,942.472546
2,Brazil,933.425644
3,France,920.838017
4,Colombia,917.252774
5,England,901.29818
6,Portugal,900.326214
7,Netherlands,893.994155
8,Germany,893.672171
9,Belgium,892.570673


In [15]:
model.predict(
    pd.DataFrame(
        [
            {
                "team_id": "Argentina",
                "opponent_id": "France",
                "team_at_home": 0.0,
                "opponent_at_home": 0.0,
            },
        ]
    )
)

array([[0.45073684, 0.27627149, 0.27299167]])

In [24]:
teams_df = dataset[~dataset["fold"].isin(["international"])]
teams = set(teams_df["team_id"].unique().tolist()).union(set(teams_df["opponent_id"].unique().tolist()))
ranking = {team: 0 for team in teams}

for team in ranking:
    for opponent in ranking:
        if team == opponent:
            continue
        pred = model.predict(
            pd.DataFrame(
                [
                    {
                        "team_id": team,
                        "opponent_id": opponent,
                        "team_at_home": 1.0,
                        "opponent_at_home": 0.0,
                    },
                ]
            )
        )
        ranking[team] += float(3 * pred[0][0] + 1 * pred[0][1])

In [25]:
pd.DataFrame(list(ranking.items()), columns=['Team', 'Score']).sort_values(by='Score', ascending=False).reset_index(drop=True).head(50)

Unnamed: 0,Team,Score
0,Football Club Internazionale Milano S.p.A.,3207.658748
1,Liverpool Football Club,3202.176534
2,Real Madrid Club de Fútbol,3173.36152
3,Arsenal Football Club,3152.742608
4,Galatasaray Spor Kulübü,3144.887547
5,Bayer 04 Leverkusen Fußball,3138.549078
6,FC Bayern München,3099.327235
7,Eindhovense Voetbalvereniging Philips Sport Ve...,3086.58446
8,Paris Saint-Germain Football Club,3084.966765
9,Sporting Clube de Portugal,3077.282646


In [37]:
pd.DataFrame(list({key: value for key, value in model.elo.rating.items() if key in teams}.items()), columns=['Team', 'Score']).sort_values(by='Score', ascending=False).reset_index(drop=True).head(50)

Unnamed: 0,Team,Score
0,Football Club Internazionale Milano S.p.A.,2014.875952
1,Liverpool Football Club,2010.585624
2,Real Madrid Club de Fútbol,1989.058354
3,Arsenal Football Club,1974.598506
4,Galatasaray Spor Kulübü,1969.274941
5,Bayer 04 Leverkusen Fußball,1965.048965
6,FC Bayern München,1940.161011
7,Eindhovense Voetbalvereniging Philips Sport Ve...,1932.498733
8,Paris Saint-Germain Football Club,1931.539597
9,Sporting Clube de Portugal,1927.024032
