# Notebook 05 - Model Optimization with Optuna

In [11]:
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('../src')
from paths import TRANSFORMED_DATA_DIR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, mean_absolute_error
from sklearn.preprocessing import LabelEncoder

import optuna

import xgboost as xgb
import lightgbm as lgb

In [12]:
# Cargar datos
pd.set_option('display.max_columns', None)
data_transformed = pd.read_csv(TRANSFORMED_DATA_DIR / 'data_transformed.csv')
data_transformed

Unnamed: 0,week,date,home,score,away,xG,xG_1,venue,referee,home_goals,away_goals,season_start,result,day_Friday,day_Monday,day_Saturday,day_Sunday,day_Thursday,day_Tuesday,day_Wednesday,home_rolling_avg_goals,away_rolling_avg_goals,home_rolling_avg_xG,away_rolling_avg_xG
0,2,2018-08-24,Getafe,2–0,Eibar,1.4,0.6,Coliseum Alfonso Pérez,David Medié,2.0,0.0,2018,Home win,True,False,False,False,False,False,False,0.0,1.0,0.20,1.30
1,2,2018-08-24,Leganés,2–2,Real Sociedad,1.7,1.6,Estadio Municipal de Butarque,José Luis Munuera,2.0,2.0,2018,Draw,True,False,False,False,False,False,False,1.0,2.0,1.00,0.70
2,2,2018-08-25,Alavés,0–0,Betis,0.6,0.9,Estadio de Mendizorroza,Pablo González,0.0,0.0,2018,Draw,False,False,True,False,False,False,False,0.0,0.0,0.30,0.90
3,2,2018-08-25,Atlético Madrid,1–0,Rayo Vallecano,0.9,1.5,Estadio Wanda Metropolitano,José González,1.0,0.0,2018,Home win,False,False,True,False,False,False,False,1.0,1.0,0.90,2.10
4,2,2018-08-25,Valladolid,0–1,Barcelona,0.4,1.0,Estadio Municipal José Zorrilla,Ricardo de Burgos,0.0,1.0,2018,Away win,False,False,True,False,False,False,False,0.0,3.0,0.00,3.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2258,38,2024-05-25,Real Sociedad,0–2,Atlético Madrid,0.6,2.2,Reale Arena,José Sánchez,0.0,2.0,2023,Away win,False,False,True,False,False,False,False,1.0,1.8,0.72,1.16
2259,38,2024-05-26,Las Palmas,1–1,Alavés,1.0,2.5,Estadio de Gran Canaria,Francisco Hernández,1.0,1.0,2023,Draw,False,False,False,True,False,False,False,0.4,1.4,1.08,1.46
2260,38,2024-05-26,Celta Vigo,2–2,Valencia,1.5,2.0,Estadio Abanca Balaídos,Miguel Ángel Ortiz Arias,2.0,2.0,2023,Draw,False,False,False,True,False,False,False,1.4,0.6,1.38,1.66
2261,38,2024-05-26,Getafe,1–2,Mallorca,0.9,1.4,Coliseum Alfonso Pérez,Víctor García,1.0,2.0,2023,Away win,False,False,False,True,False,False,False,0.6,1.0,1.52,1.08


In [13]:
# Split into train and test sets based on season_start
train_data = data_transformed[data_transformed['season_start'] <= 2022]
test_data = data_transformed[data_transformed['season_start'] == 2023]

features = [column for column in data_transformed.columns if column not in ['date', 'xG', 'xG_1', 'home', 'away', 'referee', 'venue', 'score', 'result', 'home_goals', 'away_goals', 'season_start']]

X_train = train_data[features]
y_train = train_data['result']
X_test = test_data[features]
y_test = test_data['result']

print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (1884, 12)
y_train shape: (1884,)
X_test shape: (379, 12)
y_test shape: (379,)


In [14]:
# Encode target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [15]:
#Function for defining the model pipeline
def get_pipeline(model_type="xgboost", **hyperparams):
    if model_type == "xgboost":
        model = xgb.XGBClassifier(**hyperparams, use_label_encoder=False, eval_metric='mlogloss')
    elif model_type == "lightgbm":
        model = lgb.LGBMClassifier(**hyperparams)
    else:
        raise ValueError("Unsupported model type. Choose 'xgboost' or 'lightgbm'.")
    
    return model

In [17]:
# Objective function for Optuna
def objective(trial, model_type="xgboost"):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    
    if model_type == "xgboost":
        model = xgb.XGBClassifier(**params, use_label_encoder=False, eval_metric='mlogloss')
    elif model_type == "lightgbm":
        model = lgb.LGBMClassifier(**params)
    
    kf = KFold(n_splits=3)
    scores = []
    
    for train_idx, val_idx in kf.split(X_train):
        X_train_fold, X_val_fold = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train_fold, y_val_fold = y_train_encoded[train_idx], y_train_encoded[val_idx]
        
        model.fit(X_train_fold, y_train_fold)
        preds = model.predict(X_val_fold)
        score = accuracy_score(y_val_fold, preds)
        scores.append(score)
    
    return np.mean(scores)

In [19]:
# Parameters optimization for XGBoost
study_xgb = optuna.create_study(direction="maximize")
study_xgb.optimize(lambda trial: objective(trial, model_type="xgboost"), n_trials=5)
best_params_xgb = study_xgb.best_trial.params
print(f'Best parameters for XGBoost: {best_params_xgb}')

# Parameters optimization for LightGBM
study_lgbm = optuna.create_study(direction="maximize")
study_lgbm.optimize(lambda trial: objective(trial, model_type="lightgbm"), n_trials=5)
best_params_lgbm = study_lgbm.best_trial.params
print(f'Best parameters for LightGBM: {best_params_lgbm}')

[I 2024-06-29 11:30:00,635] A new study created in memory with name: no-name-a92c77c2-dd9b-4f11-8834-bb9fed65513a


[I 2024-06-29 11:30:02,747] Trial 0 finished with value: 0.39596602972399153 and parameters: {'n_estimators': 181, 'max_depth': 9, 'learning_rate': 0.2661525350996771, 'subsample': 0.6640496226197836, 'colsample_bytree': 0.6556219139753311}. Best is trial 0 with value: 0.39596602972399153.
[I 2024-06-29 11:30:03,243] Trial 1 finished with value: 0.3996815286624204 and parameters: {'n_estimators': 69, 'max_depth': 5, 'learning_rate': 0.22693341101871126, 'subsample': 0.9353959712747884, 'colsample_bytree': 0.9033128690412235}. Best is trial 1 with value: 0.3996815286624204.
[I 2024-06-29 11:30:03,974] Trial 2 finished with value: 0.40870488322717624 and parameters: {'n_estimators': 107, 'max_depth': 5, 'learning_rate': 0.2112375546025344, 'subsample': 0.9177620177749755, 'colsample_bytree': 0.7364599132398543}. Best is trial 2 with value: 0.40870488322717624.
[I 2024-06-29 11:30:05,206] Trial 3 finished with value: 0.40817409766454354 and parameters: {'n_estimators': 59, 'max_depth': 10

Best parameters for XGBoost: {'n_estimators': 157, 'max_depth': 4, 'learning_rate': 0.2899266745003666, 'subsample': 0.9863723389666251, 'colsample_bytree': 0.5540727802972474}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.246809
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -0.820329
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 321
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start

[I 2024-06-29 11:30:06,411] Trial 0 finished with value: 0.40339702760084933 and parameters: {'n_estimators': 96, 'max_depth': 5, 'learning_rate': 0.14632782798832272, 'subsample': 0.8414412157140725, 'colsample_bytree': 0.7843347631460297}. Best is trial 0 with value: 0.40339702760084933.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000058 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 311
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -1.233054
[LightGBM] [Info] Start training from score -0.829412
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.246809
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -0.820329
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057 seconds

[I 2024-06-29 11:30:06,800] Trial 1 finished with value: 0.3885350318471337 and parameters: {'n_estimators': 115, 'max_depth': 6, 'learning_rate': 0.26764715412676715, 'subsample': 0.7422447615741794, 'colsample_bytree': 0.7795117321269938}. Best is trial 0 with value: 0.40339702760084933.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 311
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -1.233054
[LightGBM] [Info] Start training from score -0.829412
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.246809
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -0.820329
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000033 seconds

[I 2024-06-29 11:30:07,292] Trial 2 finished with value: 0.4097664543524416 and parameters: {'n_estimators': 85, 'max_depth': 6, 'learning_rate': 0.10223936506144915, 'subsample': 0.8460762189122384, 'colsample_bytree': 0.7992123419327739}. Best is trial 2 with value: 0.4097664543524416.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000034 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.246809
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -0.820329
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000126 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 321
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.309687
[LightGBM] [Info] Start training from score -1.330552
[LightGBM] [Info] Start training from score -0.764075
[LightGBM] [Info] Auto-choosing col-wi

[I 2024-06-29 11:30:08,031] Trial 3 finished with value: 0.40445859872611467 and parameters: {'n_estimators': 69, 'max_depth': 7, 'learning_rate': 0.14492350223912526, 'subsample': 0.6841929400735639, 'colsample_bytree': 0.9896153242600843}. Best is trial 2 with value: 0.4097664543524416.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.246809
[LightGBM] [Info] Start training from score -1.300877
[LightGBM] [Info] Start training from score -0.820329
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043197 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 321
[LightGBM] [Info] Number of data points in the train set: 1256, number of used features: 12
[LightGBM] [Info] Start training from score -1.309687
[LightGBM] [Info] Start training from score -1.330552
[LightGBM] [Info] Start training from score -0.764075
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds

[I 2024-06-29 11:30:08,858] Trial 4 finished with value: 0.410828025477707 and parameters: {'n_estimators': 69, 'max_depth': 9, 'learning_rate': 0.13121034417629138, 'subsample': 0.9422370944659473, 'colsample_bytree': 0.6049191750709078}. Best is trial 4 with value: 0.410828025477707.


Best parameters for LightGBM: {'n_estimators': 69, 'max_depth': 9, 'learning_rate': 0.13121034417629138, 'subsample': 0.9422370944659473, 'colsample_bytree': 0.6049191750709078}


In [25]:
pipeline = get_pipeline(**best_params_xgb)
pipeline.fit(X_train, y_train_encoded)

In [26]:
predictions = pipeline.predict(X_test)
test_mae = mean_absolute_error(y_test_encoded, predictions)
print(f'{test_mae=:.4f}')

test_mae=0.8417


In [27]:
# Define a function to train and evaluate a model
def train_and_evaluate(model_type, best_params, X_train, y_train, X_test, y_test):
    pipeline = get_pipeline(model_type=model_type, **best_params)
    pipeline.fit(X_train, y_train)
    predictions = pipeline.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    conf_matrix = confusion_matrix(y_test, predictions)
    
    print(f'{model_type} Accuracy: {accuracy:.4f}')
    print(f'{model_type} Confusion Matrix:')
    print(conf_matrix)
    return accuracy, conf_matrix

In [28]:
# Train and evaluate XGBoost model
xgb_accuracy, xgb_conf_matrix = train_and_evaluate(
    model_type="xgboost", 
    best_params=best_params_xgb, 
    X_train=X_train, 
    y_train=y_train_encoded, 
    X_test=X_test, 
    y_test=y_test_encoded
)

# Train and evaluate LightGBM model
lgbm_accuracy, lgbm_conf_matrix = train_and_evaluate(
    model_type="lightgbm", 
    best_params=best_params_lgbm, 
    X_train=X_train, 
    y_train=y_train_encoded, 
    X_test=X_test, 
    y_test=y_test_encoded
)

xgboost Accuracy: 0.3931
xgboost Confusion Matrix:
[[ 29  19  58]
 [ 27  17  62]
 [ 31  33 103]]
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000740 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 353
[LightGBM] [Info] Number of data points in the train set: 1884, number of used features: 12
[LightGBM] [Info] Start training from score -1.285402
[LightGBM] [Info] Start training from score -1.287324
[LightGBM] [Info] Start training from score -0.804185
lightgbm Accuracy: 0.4301
lightgbm Confusion Matrix:
[[ 37  21  48]
 [ 33  18  55]
 [ 25  34 108]]


### Improve the models with feature engineering and hyperparameter tuning

In [29]:
# Include new dummy variables that were left out
data_transformed_new_features = pd.get_dummies(data_transformed, columns=['home', 'away', 'referee', 'venue'])
data_transformed_new_features 


Unnamed: 0,week,date,score,xG,xG_1,home_goals,away_goals,season_start,result,day_Friday,day_Monday,day_Saturday,day_Sunday,day_Thursday,day_Tuesday,day_Wednesday,home_rolling_avg_goals,away_rolling_avg_goals,home_rolling_avg_xG,away_rolling_avg_xG,home_Alavés,home_Almería,home_Athletic Club,home_Atlético Madrid,home_Barcelona,home_Betis,home_Celta Vigo,home_Cádiz,home_Eibar,home_Elche,home_Espanyol,home_Getafe,home_Girona,home_Granada,home_Huesca,home_Las Palmas,home_Leganés,home_Levante,home_Mallorca,home_Osasuna,home_Rayo Vallecano,home_Real Madrid,home_Real Sociedad,home_Sevilla,home_Valencia,home_Valladolid,home_Villarreal,away_Alavés,away_Almería,away_Athletic Club,away_Atlético Madrid,away_Barcelona,away_Betis,away_Celta Vigo,away_Cádiz,away_Eibar,away_Elche,away_Espanyol,away_Getafe,away_Girona,away_Granada,away_Huesca,away_Las Palmas,away_Leganés,away_Levante,away_Mallorca,away_Osasuna,away_Rayo Vallecano,away_Real Madrid,away_Real Sociedad,away_Sevilla,away_Valencia,away_Valladolid,away_Villarreal,referee_Adrián Cordero,referee_Alberto Undiano,referee_Alejandro Hernández,referee_Alejandro Muñíz,referee_Antonio Matéu,referee_Antonio Matéu Lahoz,referee_Carlos del Cerro,referee_César Soto,referee_Daniel Ask,referee_David Medié,referee_Eduardo Prieto,referee_Francisco Hernández,referee_Guillermo Cuadra,referee_Hsu Jason,referee_Ignacio Iglesias,referee_Isidro Díaz de Mera,referee_Javier Alberola,referee_Javier Villanueva,referee_Jesús Gil,referee_Jorge Figueroa,referee_José González,referee_José Luis Munuera,referee_José Sánchez,referee_Juan Martínez,referee_Juan Pulido,referee_Mario Melero,referee_Mateo Busquets,referee_Miguel Ángel Ortiz Arias,referee_Pablo González,referee_Ricardo de Burgos,referee_Santiago Jaime,referee_Valentín Pizarro,referee_Víctor García,venue_Camp Nou,venue_Coliseum Alfonso Pérez,venue_Estadi Mallorca Son Moix,venue_Estadi Municipal de Montilivi,venue_Estadi Olímpic Lluís Companys,venue_Estadio Abanca Balaídos,venue_Estadio Alfredo Di Stéfano,venue_Estadio Benito Villamarín,venue_Estadio Ciudad de Valencia,venue_Estadio Cívitas Metropolitano,venue_Estadio El Alcoraz,venue_Estadio El Sadar,venue_Estadio Manuel Martínez Valero,venue_Estadio Municipal José Zorrilla,venue_Estadio Municipal de Anoeta,venue_Estadio Municipal de Butarque,venue_Estadio Municipal de Ipurúa,venue_Estadio Nuevo Los Cármenes,venue_Estadio Nuevo Mirandilla,venue_Estadio Ramón Sánchez Pizjuán,venue_Estadio Ramón de Carranza,venue_Estadio San Mamés,venue_Estadio Santiago Bernabéu,venue_Estadio Wanda Metropolitano,venue_Estadio de Balaídos,venue_Estadio de Gran Canaria,venue_Estadio de Mendizorroza,venue_Estadio de Mestalla,venue_Estadio de la Cerámica,venue_Estadio del Rayo Vallecano,venue_Iberostar Estadi,venue_Power Horse Stadium,venue_RCDE Stadium,venue_Reale Arena,venue_San Mamés
0,2,2018-08-24,2–0,1.4,0.6,2.0,0.0,2018,Home win,True,False,False,False,False,False,False,0.0,1.0,0.20,1.30,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,2,2018-08-24,2–2,1.7,1.6,2.0,2.0,2018,Draw,True,False,False,False,False,False,False,1.0,2.0,1.00,0.70,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,2,2018-08-25,0–0,0.6,0.9,0.0,0.0,2018,Draw,False,False,True,False,False,False,False,0.0,0.0,0.30,0.90,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
3,2,2018-08-25,1–0,0.9,1.5,1.0,0.0,2018,Home win,False,False,True,False,False,False,False,1.0,1.0,0.90,2.10,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False
4,2,2018-08-25,0–1,0.4,1.0,0.0,1.0,2018,Away win,False,False,True,False,False,False,False,0.0,3.0,0.00,3.20,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2258,38,2024-05-25,0–2,0.6,2.2,0.0,2.0,2023,Away win,False,False,True,False,False,False,False,1.0,1.8,0.72,1.16,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
2259,38,2024-05-26,1–1,1.0,2.5,1.0,1.0,2023,Draw,False,False,False,True,False,False,False,0.4,1.4,1.08,1.46,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
2260,38,2024-05-26,2–2,1.5,2.0,2.0,2.0,2023,Draw,False,False,False,True,False,False,False,1.4,0.6,1.38,1.66,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2261,38,2024-05-26,1–2,0.9,1.4,1.0,2.0,2023,Away win,False,False,False,True,False,False,False,0.6,1.0,1.52,1.08,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [30]:
# Train-test split
train_data = data_transformed_new_features[data_transformed_new_features['season_start'] <= 2022]
test_data = data_transformed_new_features[data_transformed_new_features['season_start'] == 2023]

features = [column for column in data_transformed_new_features.columns if column not in ['date', 'xG', 'xG_1', 'score', 'result', 'home_goals', 'away_goals', 'season_start']]

X_train = train_data[features]
y_train = train_data['result']
X_test = test_data[features]
y_test = test_data['result']

In [31]:
# Encode the labels to use it with Optuna
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [32]:
def objective_tuned(trial, model_type="xgboost"):
    if model_type == "xgboost":
        param = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 200),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'objective': 'multi:softprob',
            'eval_metric': 'mlogloss'
        }
        model = xgb.XGBClassifier(**param)
    elif model_type == "lightgbm":
        param = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 200),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'objective': 'multiclass',
            'metric': 'multi_logloss'
        }
        model = lgb.LGBMClassifier(**param)
    
    model.fit(X_train, y_train_encoded)
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test_encoded, preds)
    return accuracy


In [34]:
# Optimize for XGBoost
study_xgb = optuna.create_study(direction="maximize")
study_xgb.optimize(lambda trial: objective_tuned(trial, model_type="xgboost"), n_trials=5)
best_params_xgb = study_xgb.best_trial.params
print(f'Best parameters for XGBoost: {best_params_xgb}')

# Optimize for LightGBM
study_lgbm = optuna.create_study(direction="maximize")
study_lgbm.optimize(lambda trial: objective_tuned(trial, model_type="lightgbm"), n_trials=5)
best_params_lgbm = study_lgbm.best_trial.params
print(f'Best parameters for LightGBM: {best_params_lgbm}')

[I 2024-06-29 12:25:20,218] A new study created in memory with name: no-name-afc9a1c2-c31e-4be2-8538-9eebddc14a9a
[I 2024-06-29 12:25:22,002] Trial 0 finished with value: 0.48021108179419525 and parameters: {'n_estimators': 142, 'max_depth': 9, 'learning_rate': 0.04734511013424091, 'subsample': 0.6754860235786777, 'colsample_bytree': 0.6557181882484795}. Best is trial 0 with value: 0.48021108179419525.
[I 2024-06-29 12:25:23,458] Trial 1 finished with value: 0.5118733509234829 and parameters: {'n_estimators': 174, 'max_depth': 3, 'learning_rate': 0.15776298449860301, 'subsample': 0.9714867451029368, 'colsample_bytree': 0.9702665581227048}. Best is trial 1 with value: 0.5118733509234829.
[I 2024-06-29 12:25:25,075] Trial 2 finished with value: 0.43535620052770446 and parameters: {'n_estimators': 172, 'max_depth': 7, 'learning_rate': 0.22963402160012114, 'subsample': 0.8825361394553841, 'colsample_bytree': 0.6820483869948988}. Best is trial 1 with value: 0.5118733509234829.
[I 2024-06-29

Best parameters for XGBoost: {'n_estimators': 92, 'max_depth': 6, 'learning_rate': 0.08003203749026992, 'subsample': 0.6960065137647309, 'colsample_bytree': 0.9734061619337853}
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000059 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 559
[LightGBM] [Info] Number of data points in the train set: 1884, number of used features: 115
[LightGBM] [Info] Start training from score -1.285402
[LightGBM] [Info] Start training from score -1.287324
[LightGBM] [Info] Start training from score -0.804185


[I 2024-06-29 12:25:30,811] Trial 0 finished with value: 0.43271767810026385 and parameters: {'n_estimators': 109, 'max_depth': 7, 'learning_rate': 0.2552110716163889, 'subsample': 0.8977011563917447, 'colsample_bytree': 0.7339380832889046}. Best is trial 0 with value: 0.43271767810026385.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000634 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 559
[LightGBM] [Info] Number of data points in the train set: 1884, number of used features: 115
[LightGBM] [Info] Start training from score -1.285402
[LightGBM] [Info] Start training from score -1.287324
[LightGBM] [Info] Start training from score -0.804185


[I 2024-06-29 12:25:32,279] Trial 1 finished with value: 0.40369393139841686 and parameters: {'n_estimators': 161, 'max_depth': 10, 'learning_rate': 0.2677906770661241, 'subsample': 0.9098283070371295, 'colsample_bytree': 0.7482985856246893}. Best is trial 0 with value: 0.43271767810026385.
[I 2024-06-29 12:25:32,435] Trial 2 finished with value: 0.49076517150395776 and parameters: {'n_estimators': 97, 'max_depth': 4, 'learning_rate': 0.10154632136700115, 'subsample': 0.7217472703302776, 'colsample_bytree': 0.7742011082741167}. Best is trial 2 with value: 0.49076517150395776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001239 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 559
[LightGBM] [Info] Number of data points in the train set: 1884, number of used features: 115
[LightGBM] [Info] Start training from score -1.285402
[LightGBM] [Info] Start training from score -1.287324
[LightGBM] [Info] Start training from score -0.804185
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 559
[LightGBM] [Info] Number of data points in the train set: 1884, number of used features: 115
[LightGBM] [Info] Start training from score -1.285402
[LightGBM] [Info] Start training from score -1.287324
[LightGBM] [Info] Start training from score -0.804185


[I 2024-06-29 12:25:32,956] Trial 3 finished with value: 0.46174142480211083 and parameters: {'n_estimators': 80, 'max_depth': 5, 'learning_rate': 0.26233420202153, 'subsample': 0.6291219021769849, 'colsample_bytree': 0.538247916538936}. Best is trial 2 with value: 0.49076517150395776.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000629 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 559
[LightGBM] [Info] Number of data points in the train set: 1884, number of used features: 115
[LightGBM] [Info] Start training from score -1.285402
[LightGBM] [Info] Start training from score -1.287324
[LightGBM] [Info] Start training from score -0.804185


[I 2024-06-29 12:25:33,942] Trial 4 finished with value: 0.45646437994722955 and parameters: {'n_estimators': 120, 'max_depth': 9, 'learning_rate': 0.1467947288470368, 'subsample': 0.8776113697438157, 'colsample_bytree': 0.7765830277211068}. Best is trial 2 with value: 0.49076517150395776.


Best parameters for LightGBM: {'n_estimators': 97, 'max_depth': 4, 'learning_rate': 0.10154632136700115, 'subsample': 0.7217472703302776, 'colsample_bytree': 0.7742011082741167}


In [50]:
# Train and evaluate XGBoost model with best parameters
pipeline_xgb = xgb.XGBClassifier(**best_params_xgb)
pipeline_xgb.fit(X_train, y_train_encoded)
predictions_xgb = pipeline_xgb.predict(X_test)
accuracy_xgb = accuracy_score(y_test_encoded, predictions_xgb)
conf_matrix_xgb = confusion_matrix(y_test_encoded, predictions_xgb)
print(f'XGBoost Accuracy: {accuracy_xgb:.4f}')
print('XGBoost Confusion Matrix:')
print(conf_matrix_xgb)

XGBoost Accuracy: 0.5251
XGBoost Confusion Matrix:
[[ 41  16  49]
 [ 22  18  66]
 [ 14  13 140]]


In [53]:
import plotly.figure_factory as ff

def plot_confusion_matrix(conf_matrix, model_name):
    # Normalize the confusion matrix
    conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
    
    # Define the plotly heatmap
    z = conf_matrix_normalized
    x = ['Predicted: Home win', 'Predicted: Draw', 'Predicted: Away win']
    y = ['Actual: Home win', 'Actual: Draw', 'Actual: Away win']

    fig = ff.create_annotated_heatmap(z, x=x, y=y, colorscale='Blues', showscale=True)

    fig.update_layout(
        title=f'Confusion Matrix for {model_name}',
        xaxis_title="Predicted",
        yaxis_title="Actual"
    )

    fig.show();

In [54]:
plot_confusion_matrix(conf_matrix_xgb, "XGBoost")

#### Conclusions drawn from the result of the model

- Correctly predicts most away wins but struggles more with draws and home wins.

- Bias: towards predicting away wins.

- Confusion: the model confuses draws with away wins and also shows some confusion between home wins and draws.

- Further improvements: more and better features with tuning of hyperparameters to improve its accuracy.

#### Test with a new hypothetical match

In [41]:
data_transformed['referee'].unique()

array(['David Medié', 'José Luis Munuera', 'Pablo González',
       'José González', 'Ricardo de Burgos', 'Juan Martínez',
       'Alberto Undiano', 'Carlos del Cerro', 'Alejandro Hernández',
       'Hsu Jason', 'Adrián Cordero', 'Eduardo Prieto', 'José Sánchez',
       'Antonio Matéu Lahoz', 'Santiago Jaime', 'Ignacio Iglesias',
       'Jesús Gil', 'Daniel Ask', 'Mario Melero', 'Guillermo Cuadra',
       'César Soto', 'Valentín Pizarro', 'Jorge Figueroa',
       'Isidro Díaz de Mera', 'Miguel Ángel Ortiz Arias',
       'Alejandro Muñíz', 'Javier Villanueva', 'Juan Pulido',
       'Antonio Matéu', 'Javier Alberola', 'Francisco Hernández',
       'Víctor García', 'Mateo Busquets'], dtype=object)

In [60]:
home_team = 'Getafe'
away_team = 'Real Madrid'
venue = 'Coliseum Alfonso Pérez'
referee = 'Jesús Gil'

In [61]:
# Create a row of data for the hypothetical match
data = {
    'Wk': [25],
    'home_rolling_avg_goals': [1.9],
    'away_rolling_avg_goals': [1.2],
    'home_rolling_avg_xG': [2.1],
    'away_rolling_avg_xG': [1.3],
    'Day_Saturday': [1],
    'Home_Chelsea': [1],
    'Away_Manchester Utd': [1],
    'Referee_Anthony Taylor': [1],
    'Venue_Stamford Bridge': [1]
}

match = pd.DataFrame(columns=X_train.columns, data=data)

match.fillna(0, inplace=True)

In [62]:
new_prediction = pipeline_xgb.predict(match)
new_prediction

array([2])

In [63]:
# Print them
print(f"Encoded classes: {label_encoder.classes_}")

# Two
print(f"Corresponding class por prediction: {label_encoder.inverse_transform([2])}")

Encoded classes: ['Away win' 'Draw' 'Home win']
Corresponding class por prediction: ['Home win']


#### Real results vs. predicted results

In [81]:
data_transformed[data_transformed['home'] == 'Real Madrid'] 

Unnamed: 0,week,date,home,score,away,xG,xG_1,venue,referee,home_goals,away_goals,season_start,result,day_Friday,day_Monday,day_Saturday,day_Sunday,day_Thursday,day_Tuesday,day_Wednesday,home_rolling_avg_goals,away_rolling_avg_goals,home_rolling_avg_xG,away_rolling_avg_xG
14,3,2018-09-01,Real Madrid,4–1,Leganés,2.5,1.1,Estadio Santiago Bernabéu,Santiago Jaime,4.0,1.0,2018,Home win,False,False,True,False,False,False,False,3.00,1.50,2.45,1.350
34,5,2018-09-22,Real Madrid,1–0,Espanyol,1.0,0.4,Estadio Santiago Bernabéu,Antonio Matéu Lahoz,1.0,0.0,2018,Home win,False,False,True,False,False,False,False,2.75,1.25,2.30,1.175
50,7,2018-09-29,Real Madrid,0–0,Atlético Madrid,1.2,0.8,Estadio Santiago Bernabéu,Juan Martínez,0.0,0.0,2018,Draw,False,False,True,False,False,False,False,2.00,1.40,2.14,1.120
70,9,2018-10-20,Real Madrid,1–2,Levante,2.9,2.0,Estadio Santiago Bernabéu,Guillermo Cuadra,1.0,2.0,2018,Away win,False,False,True,False,False,False,False,0.40,1.20,1.26,1.300
91,11,2018-11-03,Real Madrid,2–0,Valladolid,2.2,0.9,Estadio Santiago Bernabéu,Jesús Gil,2.0,0.0,2018,Home win,False,False,True,False,False,False,False,0.40,1.20,1.70,0.820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2180,30,2024-03-31,Real Madrid,2–0,Athletic Club,1.1,0.3,Estadio Santiago Bernabéu,Javier Alberola,2.0,0.0,2023,Home win,False,False,False,True,False,False,False,2.40,1.60,2.12,1.040
2201,32,2024-04-21,Real Madrid,3–2,Barcelona,2.3,1.4,Estadio Santiago Bernabéu,César Soto,3.0,2.0,2023,Home win,False,False,False,True,False,False,False,2.60,1.20,2.22,1.360
2215,34,2024-05-04,Real Madrid,3–0,Cádiz,2.3,0.9,Estadio Santiago Bernabéu,Javier Villanueva,3.0,0.0,2023,Home win,False,False,True,False,False,False,False,2.20,0.60,1.64,1.080
2234,36,2024-05-14,Real Madrid,5–0,Alavés,2.5,1.3,Estadio Santiago Bernabéu,Mateo Busquets,5.0,0.0,2023,Home win,False,False,False,False,False,True,False,2.40,1.60,1.60,1.100


In [94]:
test_data[(test_data['home_Real Madrid'] == True) & (test_data['venue_Estadio Santiago Bernabéu'] == True)]

Unnamed: 0,week,date,score,xG,xG_1,home_goals,away_goals,season_start,result,day_Friday,day_Monday,day_Saturday,day_Sunday,day_Thursday,day_Tuesday,day_Wednesday,home_rolling_avg_goals,away_rolling_avg_goals,home_rolling_avg_xG,away_rolling_avg_xG,home_Alavés,home_Almería,home_Athletic Club,home_Atlético Madrid,home_Barcelona,home_Betis,home_Celta Vigo,home_Cádiz,home_Eibar,home_Elche,home_Espanyol,home_Getafe,home_Girona,home_Granada,home_Huesca,home_Las Palmas,home_Leganés,home_Levante,home_Mallorca,home_Osasuna,home_Rayo Vallecano,home_Real Madrid,home_Real Sociedad,home_Sevilla,home_Valencia,home_Valladolid,home_Villarreal,away_Alavés,away_Almería,away_Athletic Club,away_Atlético Madrid,away_Barcelona,away_Betis,away_Celta Vigo,away_Cádiz,away_Eibar,away_Elche,away_Espanyol,away_Getafe,away_Girona,away_Granada,away_Huesca,away_Las Palmas,away_Leganés,away_Levante,away_Mallorca,away_Osasuna,away_Rayo Vallecano,away_Real Madrid,away_Real Sociedad,away_Sevilla,away_Valencia,away_Valladolid,away_Villarreal,referee_Adrián Cordero,referee_Alberto Undiano,referee_Alejandro Hernández,referee_Alejandro Muñíz,referee_Antonio Matéu,referee_Antonio Matéu Lahoz,referee_Carlos del Cerro,referee_César Soto,referee_Daniel Ask,referee_David Medié,referee_Eduardo Prieto,referee_Francisco Hernández,referee_Guillermo Cuadra,referee_Hsu Jason,referee_Ignacio Iglesias,referee_Isidro Díaz de Mera,referee_Javier Alberola,referee_Javier Villanueva,referee_Jesús Gil,referee_Jorge Figueroa,referee_José González,referee_José Luis Munuera,referee_José Sánchez,referee_Juan Martínez,referee_Juan Pulido,referee_Mario Melero,referee_Mateo Busquets,referee_Miguel Ángel Ortiz Arias,referee_Pablo González,referee_Ricardo de Burgos,referee_Santiago Jaime,referee_Valentín Pizarro,referee_Víctor García,venue_Camp Nou,venue_Coliseum Alfonso Pérez,venue_Estadi Mallorca Son Moix,venue_Estadi Municipal de Montilivi,venue_Estadi Olímpic Lluís Companys,venue_Estadio Abanca Balaídos,venue_Estadio Alfredo Di Stéfano,venue_Estadio Benito Villamarín,venue_Estadio Ciudad de Valencia,venue_Estadio Cívitas Metropolitano,venue_Estadio El Alcoraz,venue_Estadio El Sadar,venue_Estadio Manuel Martínez Valero,venue_Estadio Municipal José Zorrilla,venue_Estadio Municipal de Anoeta,venue_Estadio Municipal de Butarque,venue_Estadio Municipal de Ipurúa,venue_Estadio Nuevo Los Cármenes,venue_Estadio Nuevo Mirandilla,venue_Estadio Ramón Sánchez Pizjuán,venue_Estadio Ramón de Carranza,venue_Estadio San Mamés,venue_Estadio Santiago Bernabéu,venue_Estadio Wanda Metropolitano,venue_Estadio de Balaídos,venue_Estadio de Gran Canaria,venue_Estadio de Mendizorroza,venue_Estadio de Mestalla,venue_Estadio de la Cerámica,venue_Estadio del Rayo Vallecano,venue_Iberostar Estadi,venue_Power Horse Stadium,venue_RCDE Stadium,venue_Reale Arena,venue_San Mamés
1916,4,2023-09-02,2–1,2.8,0.4,2.0,1.0,2023,Home win,False,False,True,False,False,False,False,1.8,0.6,1.38,1.3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
1930,5,2023-09-17,2–1,2.0,1.6,2.0,1.0,2023,Home win,False,False,False,True,False,False,False,1.8,1.8,1.78,1.36,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
1945,7,2023-09-27,2–0,1.7,0.7,2.0,0.0,2023,Home win,False,False,False,False,False,False,True,1.8,0.2,1.84,0.82,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
1965,9,2023-10-07,4–0,3.2,0.5,4.0,0.0,2023,Home win,False,False,True,False,False,False,False,2.0,1.0,1.96,1.08,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
1998,12,2023-11-05,0–0,2.2,0.1,0.0,0.0,2023,Draw,False,False,False,True,False,False,False,2.4,1.4,1.98,1.2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
2003,13,2023-11-11,5–1,1.7,1.7,5.0,1.0,2023,Home win,False,False,True,False,False,False,False,2.0,1.2,2.08,1.16,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
2025,15,2023-12-02,2–0,1.8,0.1,2.0,0.0,2023,Home win,False,False,True,False,False,False,False,2.2,0.8,1.64,1.2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
2050,17,2023-12-17,4–1,2.3,0.8,4.0,1.0,2023,Home win,False,False,False,True,False,False,False,2.2,1.4,1.66,1.74,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
2067,19,2024-01-03,1–0,1.7,0.6,1.0,0.0,2023,Home win,False,False,False,False,False,False,True,2.2,1.0,1.46,1.08,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
2086,21,2024-01-21,3–2,2.2,0.9,3.0,2.0,2023,Home win,False,False,False,True,False,False,False,1.8,0.6,1.48,1.02,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False
