In [30]:
import random

import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, log_loss, roc_auc_score

from src.dataset import get_dataset
from src.models.dualemb import DualEmbPredictor
from src.models.elog import ELOgPredictor
from src.models.freq import FrequencyMatchPredictor
from src.models.uni import UniformMatchPredictor

In [31]:
ITERATIONS = 1
random.seed(548)
np.random.seed(548)

In [32]:
def determine_target(row):
    if row["team_score"] > row["opponent_score"]:
        return 0
    elif row["team_score"] == row["opponent_score"]:
        return 1
    else:
        return 2

In [33]:
dataset = get_dataset()

  mls_df = pd.read_csv("data/mls_matches.csv")


In [34]:
dataset

Unnamed: 0,team_id,opponent_id,team_at_home,opponent_at_home,team_score,opponent_score,fold
0,Scotland,England,1.0,0.0,0,0,international
1,England,Scotland,1.0,0.0,4,2,international
2,Scotland,England,1.0,0.0,2,1,international
3,England,Scotland,1.0,0.0,2,2,international
4,Scotland,England,1.0,0.0,3,0,international
...,...,...,...,...,...,...,...
143000,Sport Lisboa e Benfica,Sporting Clube de Braga,1.0,0.0,3,0,europe
143001,Panathinaikos Athlitikos Omilos,APS Atromitos Athinon,1.0,0.0,2,1,europe
143002,Fulham Football Club,Watford FC,1.0,0.0,4,1,europe
143003,Panthessalonikios Athlitikos Omilos Konstantin...,Athlitiki Enosi Konstantinoupoleos,1.0,0.0,1,1,europe


In [35]:
model_classes = [
    # FrequencyMatchPredictor,
    # UniformMatchPredictor,
    # ELOgPredictor,
    DualEmbPredictor,
]
# folds_names = ["brazil", "libertadores", "mls", "europe", "international"]
folds_names = ["brazil", "international"]

In [36]:
folds_train = [dataset[dataset["fold"] != name] for name in folds_names]
folds_test = [dataset[dataset["fold"] == name] for name in folds_names]

In [37]:
results = pd.DataFrame({}, columns=["metric", "model", "fold", "iteration", "value"])

In [38]:
for iteration in range(ITERATIONS):
    for model_class in model_classes:
        for fold_train, fold_test, fold_test_name in zip(
            folds_train, folds_test, folds_names
        ):
            X_train = fold_train[
                ["team_id", "opponent_id", "team_at_home", "opponent_at_home"]
            ]
            y_train = fold_train[["team_score", "opponent_score"]] / 10.0
            X_test = fold_test[
                ["team_id", "opponent_id", "team_at_home", "opponent_at_home"]
            ]
            y_test = fold_test[["team_score", "opponent_score"]] / 10.0
            model = model_class(
                embedding_dim=10, hidden_dim=2, num_epochs=25, update_learning_rate=0.01
            )
            model.fit(X_train, y_train)
            pred = model.predict_and_update(X_test, y_test)
            max_pred = np.argmax(pred, axis=1)
            target = fold_test.apply(determine_target, axis=1).to_numpy()
            report = classification_report(
                target, max_pred, target_names=["win", "draw", "loss"], output_dict=True
            )
            metrics = {
                "accuracy": report["accuracy"],
                "log_loss": log_loss(target, pred, labels=[0, 1, 2]),
                "micro_auc_roc": roc_auc_score(
                    target, pred, average="micro", multi_class="ovr"
                ),
                "weighted_precision": report["weighted avg"]["precision"],
                "weighted_recall": report["weighted avg"]["recall"],
                "macro_precision": report["macro avg"]["precision"],
                "macro_recall": report["macro avg"]["recall"],
            }
            for key, value in metrics.items():
                results.loc[len(results)] = {
                    "metric": key,
                    "model": model_class.__name__,
                    "fold": fold_test_name,
                    "iteration": iteration + 1,
                    "value": value,
                }

Epoch 1/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:14<00:00, 254.62it/s, loss=0.0220]


Epoch 2/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:17<00:00, 216.55it/s, loss=0.0220]


Epoch 3/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:16<00:00, 223.73it/s, loss=0.0220]


Epoch 4/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:15<00:00, 245.01it/s, loss=0.0220]


Epoch 5/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:15<00:00, 243.38it/s, loss=0.0220]


Epoch 6/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:16<00:00, 225.79it/s, loss=0.0220]


Epoch 7/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:14<00:00, 253.55it/s, loss=0.0220]


Epoch 8/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:15<00:00, 245.96it/s, loss=0.0220]


Epoch 9/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:15<00:00, 244.02it/s, loss=0.0220]


Epoch 10/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:15<00:00, 235.41it/s, loss=0.0220]


Epoch 11/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:14<00:00, 257.54it/s, loss=0.0220]


Epoch 12/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:13<00:00, 269.61it/s, loss=0.0220]


Epoch 13/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:12<00:00, 297.31it/s, loss=0.0220]


Epoch 14/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:12<00:00, 291.78it/s, loss=0.0220]


Epoch 15/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:14<00:00, 263.14it/s, loss=0.0220]


Epoch 16/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:13<00:00, 267.07it/s, loss=0.0220]


Epoch 17/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:11<00:00, 319.59it/s, loss=0.0220]


Epoch 18/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:11<00:00, 314.68it/s, loss=0.0220]


Epoch 19/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:12<00:00, 307.09it/s, loss=0.0220]


Epoch 20/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:12<00:00, 284.86it/s, loss=0.0220]


Epoch 21/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:13<00:00, 269.19it/s, loss=0.0220]


Epoch 22/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:14<00:00, 261.61it/s, loss=0.0220]


Epoch 23/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:12<00:00, 305.19it/s, loss=0.0220]


Epoch 24/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:10<00:00, 352.93it/s, loss=0.0220]


Epoch 25/25


100%|█████████████████████████████████████████████████████████████████| 3699/3699 [00:10<00:00, 343.98it/s, loss=0.0220]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pair_X["team_id"] = [0, 1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pair_X["opponent_id"] = [1, 0]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 237.34it/s, loss=0.0124]


Epoch 2/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 258.76it/s, loss=0.0114]


Epoch 3/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 241.62it/s, loss=0.0112]


Epoch 4/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 250.27it/s, loss=0.0111]


Epoch 5/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 246.79it/s, loss=0.0111]


Epoch 6/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 240.30it/s, loss=0.0111]


Epoch 7/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 234.00it/s, loss=0.0111]


Epoch 8/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:13<00:00, 227.11it/s, loss=0.0111]


Epoch 9/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 239.48it/s, loss=0.0111]


Epoch 10/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:13<00:00, 224.96it/s, loss=0.0111]


Epoch 11/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 244.70it/s, loss=0.0111]


Epoch 12/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 241.61it/s, loss=0.0111]


Epoch 13/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 266.53it/s, loss=0.0111]


Epoch 14/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 269.79it/s, loss=0.0111]


Epoch 15/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:10<00:00, 294.72it/s, loss=0.0111]


Epoch 16/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 258.16it/s, loss=0.0111]


Epoch 17/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 252.90it/s, loss=0.0110]


Epoch 18/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:10<00:00, 273.74it/s, loss=0.0111]


Epoch 19/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:10<00:00, 277.38it/s, loss=0.0110]


Epoch 20/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:10<00:00, 276.05it/s, loss=0.0110]


Epoch 21/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:11<00:00, 256.49it/s, loss=0.0110]


Epoch 22/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 241.11it/s, loss=0.0110]


Epoch 23/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:17<00:00, 174.39it/s, loss=0.0110]


Epoch 24/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:14<00:00, 199.39it/s, loss=0.0110]


Epoch 25/25


100%|█████████████████████████████████████████████████████████████████| 2976/2976 [00:12<00:00, 239.88it/s, loss=0.0109]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pair_X["team_id"] = [0, 1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pair_X["opponent_id"] = [1, 0]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [39]:
results

Unnamed: 0,metric,model,fold,iteration,value
0,accuracy,DualEmbPredictor,brazil,1,0.481029
1,log_loss,DualEmbPredictor,brazil,1,1.096001
2,micro_auc_roc,DualEmbPredictor,brazil,1,0.542101
3,weighted_precision,DualEmbPredictor,brazil,1,0.231389
4,weighted_recall,DualEmbPredictor,brazil,1,0.481029
5,macro_precision,DualEmbPredictor,brazil,1,0.160343
6,macro_recall,DualEmbPredictor,brazil,1,0.333333
7,accuracy,DualEmbPredictor,international,1,0.490215
8,log_loss,DualEmbPredictor,international,1,1.056131
9,micro_auc_roc,DualEmbPredictor,international,1,0.637642


In [40]:
results.groupby(["metric", "model", "fold"])["value"].mean().reset_index().groupby(
    ["metric", "model"]
)["value"].mean().reset_index().pivot(index="model", columns="metric", values="value")

metric,accuracy,log_loss,macro_precision,macro_recall,micro_auc_roc,weighted_precision,weighted_recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
DualEmbPredictor,0.485622,1.076066,0.161874,0.333333,0.589872,0.23585,0.485622
