In [1]:
import random

import numpy as np
from sklearn.metrics import accuracy_score

from src.dataset import get_dataset
from src.models.elog import ELOgPredictor
from src.models.frequency_random import FrequencyRandomMatchPredictor
from src.models.uniform_random import UniformRandomMatchPredictor

In [11]:
ITERATIONS = 1
random.seed(5438)
np.random.seed(5438)

In [12]:
def determine_target(row):
    if row["home_score"] > row["away_score"]:
        return 0
    elif row["home_score"] == row["away_score"]:
        return 1
    else:
        return 2

In [13]:
dataset = get_dataset()

  mls_df = pd.read_csv("data/mls_matches.csv")


In [14]:
dataset

Unnamed: 0,date,home_team,home_score,away_score,away_team,neutral,fold
0,1971-08-07,Santa Cruz,1,4,Corinthians,False,brazil
1,1971-08-07,Internacional,0,0,Fluminense,False,brazil
2,1971-08-07,Palmeiras,1,0,Portuguesa-SP,False,brazil
3,1971-08-07,Ceará,0,0,Vasco da Gama,False,brazil
4,1971-08-07,Coritiba,0,2,Cruzeiro,False,brazil
...,...,...,...,...,...,...,...
143000,2017-07-06,Grenada,0,2,Barbados,False,international
143001,2016-08-25,Barawa,0,5,Tamil Eelam,False,international
143002,2016-08-26,Barawa,2,3,Chagos Islands,False,international
143003,2016-08-28,Chagos Islands,1,5,Tamil Eelam,True,international


In [15]:
model_classes = [ELOgPredictor]
# FrequencyRandomMatchPredictor, UniformRandomMatchPredictor, ]
folds_names = ["brazil", "libertadores", "mls", "europe", "international"]

In [16]:
folds_train = [dataset[dataset["fold"] != name] for name in folds_names]
folds_test = [dataset[dataset["fold"] == name] for name in folds_names]

In [17]:
results = {
    "accuracy": {
        model_class.__name__: {name: [] for name in folds_names}
        for model_class in model_classes
    },
}

In [18]:
for _ in range(ITERATIONS):
    for model_class in model_classes:
        for fold_train, fold_test, fold_test_name in zip(
            folds_train, folds_test, folds_names
        ):
            model = model_class()
            model.fit(fold_train)
            pred = model.predict(fold_test)
            target = fold_test.apply(determine_target, axis=1)
            results["accuracy"][model_class.__name__][fold_test_name].append(
                accuracy_score(target, pred)
            )

In [19]:
results

{'accuracy': {'ELOgPredictor': {'brazil': [0.5037941809032991],
   'libertadores': [0.5451327433628319],
   'mls': [0.5174921114007408],
   'europe': [0.5084704192288152],
   'international': [0.5744605144734914]}}}