The purpose of this notebook is to generate various ratings/rankings features for the teams in our dataset. We will generate elo ratings, pi ratings, rpi ratings, as well as league rankings, and use a neural network to create embeddings for the home and away teams which will serve as a ranking of the home strength and away strength of the home and away teams respectively. 

In order to speed up the calculations we will be creating these features for the subset of top European leagues. This will give us more than enough games to generate accurate ratings and give us a large training set to work with. We will be conducting an EDA on the generated features and will be comparing them using a single variable logistic regression model.

In [27]:
import pandas as pd
import numpy as np
import os
import glob
from ast import literal_eval
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from scipy.optimize import minimize
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, brier_score_loss, log_loss
from torch.optim.lr_scheduler import ExponentialLR
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
import optuna
import pickle
import optuna
from sklearn.model_selection import StratifiedKFold, cross_val_score, cross_validate
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from catboost import CatBoostClassifier


In [12]:
df = pd.read_csv("df_opt.csv")
df = df[df['season'] != 2013]

features = ['home_win', 'away_win', 'pi_diff','gd_diff', 
            'elo_gls_diff', 'home_ip_scaled', 'away_ip_scaled',
       'home_ip', 'away_ip', 'ip_diff', 'season']

df = df[features].astype(float)

train = df[df['season'] != 2022.]
test = df[df['season'] == 2022.]

train = train.dropna()
test = test.dropna()

In [28]:
class Optimiser:
    
    def __init__(self, df, model_name, verbosity=False):
        self.df = df
        self.model_name = model_name
        self.verbosity = verbosity
        
        if self.model_name == "XGB":
            self.model = XGBClassifier()
        elif self.model_name == "LGBM":
            self.model = LGBMClassifier()
        elif self.model_name == "GBC":
            self.model = GradientBoostingClassifier()
        elif self.model_name == "CAT":
            self.model = CatBoostClassifier(verbose = verbosity)
            
    def process_df(self, df):
        
        df = df.astype(float)
        df = df[df['season'] != 2013]
        df = df.dropna()
        train = df[df['season'] != 2022.]
        test = df[df['season'] == 2022.]
        
        X_train = train.drop(['home_win', 'away_win', 'season'], axis=1)
        X_test = test.drop(['home_win', 'away_win', 'season'], axis=1)
        y_train = train['home_win']
        y_test = test['home_win']

        return X_train, X_test, y_train, y_test
    
    def objective(self, trial):


        if self.model_name == "XGB":
            params = {
                
            'booster': 'gbtree',
            'max_depth': trial.suggest_int('max_depth', 1, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=100),
            'scale_pos_weight': (0.45/0.55),
            'subsample': trial.suggest_float('subsample', 0.2, 1.0, step=0.1),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0, step=0.1),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10, log=True),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 10, log=True),
            'gamma': trial.suggest_float('gamma', 1e-5, 10, log=True),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10)
                
        }
            model = XGBClassifier(**params)

        elif self.model_name == "LGBM":
            
            params = {
                'boosting_type': 'gbdt',
                'max_depth': trial.suggest_int('max_depth', 1, 10),
                'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=100),
                'subsample': trial.suggest_float('subsample', 0.2, 1.0, step=0.1),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0, step=0.1),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10, log=True),
                'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 10, log=True),
                'min_child_weight': trial.suggest_float('min_child_weight', 1e-5, 10, log=True)
            }

            model = LGBMClassifier(**params)
            
        elif self.model_name == "GBC":
            
            params = {
                'loss': 'deviance',
                'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=100),
                'subsample': trial.suggest_float('subsample', 0.2, 1.0, step=0.1),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_depth': trial.suggest_int('max_depth', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
                'random_state': 42
            }

            model = GradientBoostingClassifier(**params)
            
        elif self.model_name == "CAT":

            params = {
                'iterations': trial.suggest_int('iterations', 100, 1000, step=100),
                'depth': trial.suggest_int('depth', 1, 10),
                'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
                'random_seed': 42,
                'loss_function': 'Logloss',
                'eval_metric': 'Accuracy',
                'bootstrap_type': 'Bayesian',
                'subsample': trial.suggest_float('subsample', 0.2, 1.0, step=0.1),
                'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.2, 1.0, step=0.1),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-5, 10, log=True),
                'min_child_samples': trial.suggest_int('min_child_samples', 1, 20),
                'verbose': self.verbosity
            }

            model = CatBoostClassifier(**params)
        
        X_train, X_test, y_train, y_test = self.process_df(self.df)
        model.fit(X_train, y_train)
        y_prob = model.predict_proba(X_test)
        auc = roc_auc_score(y_test, y_prob[:, 1])

        return auc

    def optimise(self):
        
        self.X_train, self.X_test, self.y_train, self.y_test = self.process_df(self.df)

        study = optuna.create_study(direction='maximize')
        study.optimize(self.objective, n_trials=100, show_progress_bar=self.verbosity)
        best_params = study.best_params
        
        if self.model_name == "XGB":
            best_model = XGBClassifier(**best_params)
            
        elif  self.model_name == "LGBM":
            best_model = LGBMClassifier(**best_params)
            
        kfold = StratifiedKFold(n_splits=5, shuffle=True)
        results = cross_val_score(self.model, self.X_train, self.y_train, cv=kfold, verbose=self.verbosity)
        
        print("Results of K-fold CV for", self.model, ":", results)
        print("Mean:", results.mean())
        print("Std:", results.std())

        best_model.fit(self.X_train, self.y_train, 
                       early_stopping_rounds=1000, 
                       eval_set=[(self.X_test, self.y_test)],
                       verbose=self.verbosity)

        y_pred_test = best_model.predict(self.X_test)
        y_prob_test = best_model.predict_proba(self.X_test)[:, 1]

        test_accuracy = accuracy_score(self.y_test, y_pred_test)
        test_f1 = f1_score(self.y_test, y_pred_test)

        test_roc_auc_score = roc_auc_score(self.y_test, y_prob_test)
        test_precision_score = precision_score(self.y_test, y_pred_test)

        print("Best Hyperparameters:", best_params)
        print("Test Accuracy:", test_accuracy)
        print("Test F1 Score:", test_f1)
        print("Test AUC:", test_roc_auc_score)
        print("Test Precision Score:", test_precision_score)

        return best_model


In [24]:
# lightgbm with odds data
optimiser = Optimiser(df, "LGBM", verbosity=False)
model = optimiser.optimise()
model_filename = "lgbm_home_win_with_odds.pkl"
with open(model_filename, "wb") as file:
    pickle.dump(model, file)

[I 2023-07-05 17:41:20,628] A new study created in memory with name: no-name-c3500b8b-184c-48f9-b29c-39f7749503e2
[I 2023-07-05 17:41:20,711] Trial 0 finished with value: 0.7377686523024926 and parameters: {'max_depth': 1, 'learning_rate': 0.05050775271354401, 'n_estimators': 200, 'subsample': 0.5, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 3.866367189790053e-05, 'reg_alpha': 0.96847707935293, 'min_child_weight': 0.02994246844842017}. Best is trial 0 with value: 0.7377686523024926.
[I 2023-07-05 17:41:20,855] Trial 1 finished with value: 0.7379349387410225 and parameters: {'max_depth': 1, 'learning_rate': 0.03509850493836054, 'n_estimators': 400, 'subsample': 0.4, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 0.00035244351487782335, 'reg_alpha': 0.037231809764158935, 'min_child_weight': 5.468859580600358e-05}. Best is trial 1 with value: 0.7379349387410225.
[I 2023-07-05 17:41:21,120] Trial 2 finished with value: 0.7540082805238699 and parameters: {'max_depth': 8, 'l

[I 2023-07-05 17:41:36,670] Trial 22 finished with value: 0.7493745669623997 and parameters: {'max_depth': 7, 'learning_rate': 0.018441525014402223, 'n_estimators': 100, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.4, 'reg_lambda': 7.794644118140835, 'reg_alpha': 0.2900871827292046, 'min_child_weight': 1.4599401788169115}. Best is trial 17 with value: 0.7712513730460497.
[I 2023-07-05 17:41:37,509] Trial 23 finished with value: 0.7706213772708069 and parameters: {'max_depth': 10, 'learning_rate': 0.027003771608544776, 'n_estimators': 600, 'subsample': 1.0, 'colsample_bytree': 0.5, 'reg_lambda': 0.04403471800866459, 'reg_alpha': 1.7374797245443323, 'min_child_weight': 0.1774082686437504}. Best is trial 17 with value: 0.7712513730460497.
[I 2023-07-05 17:41:38,397] Trial 24 finished with value: 0.769453316434305 and parameters: {'max_depth': 10, 'learning_rate': 0.04381156880021076, 'n_estimators': 600, 'subsample': 0.8, 'colsample_bytree': 0.7, 'reg_lambda': 1.414890007055601,

[I 2023-07-05 17:41:57,277] Trial 44 finished with value: 0.7719337558090409 and parameters: {'max_depth': 4, 'learning_rate': 0.03498711140630237, 'n_estimators': 900, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.7, 'reg_lambda': 9.207977574869663, 'reg_alpha': 1.103292253381097, 'min_child_weight': 1.1933812634840795}. Best is trial 41 with value: 0.7720807773553021.
[I 2023-07-05 17:41:58,056] Trial 45 finished with value: 0.7707714406421632 and parameters: {'max_depth': 4, 'learning_rate': 0.05800106801961582, 'n_estimators': 1000, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.8, 'reg_lambda': 8.788406947618515, 'reg_alpha': 0.8834086200970356, 'min_child_weight': 1.106565369985108}. Best is trial 41 with value: 0.7720807773553021.
[I 2023-07-05 17:41:58,750] Trial 46 finished with value: 0.7718732572877058 and parameters: {'max_depth': 4, 'learning_rate': 0.038039043736443394, 'n_estimators': 900, 'subsample': 0.2, 'colsample_bytree': 0.9000000000000001, 'reg_

[I 2023-07-05 17:42:11,695] Trial 66 finished with value: 0.7572941275876638 and parameters: {'max_depth': 6, 'learning_rate': 0.08732771554300779, 'n_estimators': 900, 'subsample': 0.4, 'colsample_bytree': 0.8, 'reg_lambda': 2.2347037674842185, 'reg_alpha': 1.118817984485216, 'min_child_weight': 1.2476580812988047}. Best is trial 59 with value: 0.7727810730882974.
[I 2023-07-05 17:42:12,371] Trial 67 finished with value: 0.7717316434305027 and parameters: {'max_depth': 4, 'learning_rate': 0.04047840412612542, 'n_estimators': 800, 'subsample': 0.30000000000000004, 'colsample_bytree': 1.0, 'reg_lambda': 4.174249789021216, 'reg_alpha': 0.2679531577859429, 'min_child_weight': 0.38528739490160513}. Best is trial 59 with value: 0.7727810730882974.
[I 2023-07-05 17:42:13,347] Trial 68 finished with value: 0.7654367553865653 and parameters: {'max_depth': 5, 'learning_rate': 0.06616894060264919, 'n_estimators': 1000, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.9000000000000001, 're

[I 2023-07-05 17:42:26,419] Trial 88 finished with value: 0.7686516265314745 and parameters: {'max_depth': 5, 'learning_rate': 0.045126993397019796, 'n_estimators': 900, 'subsample': 0.4, 'colsample_bytree': 0.8, 'reg_lambda': 4.293249364643706, 'reg_alpha': 1.5765846205718312, 'min_child_weight': 4.334118076792393}. Best is trial 71 with value: 0.7733455006337135.
[I 2023-07-05 17:42:26,776] Trial 89 finished with value: 0.7384402196873681 and parameters: {'max_depth': 1, 'learning_rate': 0.027669168780014387, 'n_estimators': 900, 'subsample': 0.2, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 7.084302481183561, 'reg_alpha': 0.1345449538570042, 'min_child_weight': 2.5510624333084553}. Best is trial 71 with value: 0.7733455006337135.
[I 2023-07-05 17:42:27,920] Trial 90 finished with value: 0.7625017321504014 and parameters: {'max_depth': 6, 'learning_rate': 0.05471065086890164, 'n_estimators': 1000, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.8, 'reg_lambda': 2.501

Results of K-fold CV for LGBMClassifier() : [0.70732379 0.7001652  0.70787445 0.70594714 0.70201047]
Mean: 0.7046642076247487
Std: 0.003043299599803405




Best Hyperparameters: {'max_depth': 4, 'learning_rate': 0.03831628585040062, 'n_estimators': 900, 'subsample': 0.2, 'colsample_bytree': 0.8, 'reg_lambda': 1.6589792192011186, 'reg_alpha': 0.5750262101450965, 'min_child_weight': 5.16458108325335}
Test Accuracy: 0.7110655737704918
Test F1 Score: 0.6647646219686162
Test AUC: 0.7736158850866075
Test Precision Score: 0.7147239263803681


In [25]:
# xgb without odds data
optimiser = Optimiser(df, "XGB", verbosity=False)
model = optimiser.optimise()

model_filename = "xgb_home_win_with_odds.pkl"
with open(model_filename, "wb") as file:
    pickle.dump(model, file)


[I 2023-07-05 17:43:49,888] A new study created in memory with name: no-name-4162b388-d646-479e-b596-2d4e90ec9550
[I 2023-07-05 17:43:53,465] Trial 0 finished with value: 0.7556758766370933 and parameters: {'max_depth': 3, 'learning_rate': 0.013115152641434652, 'n_estimators': 1000, 'subsample': 0.6000000000000001, 'colsample_bytree': 1.0, 'reg_lambda': 0.17803358180236992, 'reg_alpha': 0.00013134638906976326, 'gamma': 0.0016536198180401554, 'min_child_weight': 5}. Best is trial 0 with value: 0.7556758766370933.
[I 2023-07-05 17:43:54,814] Trial 1 finished with value: 0.7474771440642163 and parameters: {'max_depth': 10, 'learning_rate': 0.02611725081303864, 'n_estimators': 200, 'subsample': 1.0, 'colsample_bytree': 0.2, 'reg_lambda': 0.017553666146364794, 'reg_alpha': 0.13551824116877184, 'gamma': 0.00013583791485704687, 'min_child_weight': 10}. Best is trial 0 with value: 0.7556758766370933.
[I 2023-07-05 17:43:57,173] Trial 2 finished with value: 0.7533167722855935 and parameters: {'

[I 2023-07-05 17:45:13,494] Trial 21 finished with value: 0.763410223912125 and parameters: {'max_depth': 5, 'learning_rate': 0.02076391283854109, 'n_estimators': 1000, 'subsample': 0.8, 'colsample_bytree': 0.4, 'reg_lambda': 0.5431015404376119, 'reg_alpha': 0.013196856199393281, 'gamma': 9.274531534298323, 'min_child_weight': 1}. Best is trial 16 with value: 0.7696256865230249.
[I 2023-07-05 17:45:20,607] Trial 22 finished with value: 0.7687678918462189 and parameters: {'max_depth': 7, 'learning_rate': 0.028972586639300506, 'n_estimators': 1000, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.5, 'reg_lambda': 0.026836060891380305, 'reg_alpha': 0.014817261148075436, 'gamma': 3.6594270517713685, 'min_child_weight': 2}. Best is trial 16 with value: 0.7696256865230249.
[I 2023-07-05 17:45:27,159] Trial 23 finished with value: 0.7692268694550064 and parameters: {'max_depth': 7, 'learning_rate': 0.024694389511390292, 'n_estimators': 900, 'subsample': 0.9000000000000001, 'colsample_by

[I 2023-07-05 17:46:40,170] Trial 42 finished with value: 0.770552429235319 and parameters: {'max_depth': 3, 'learning_rate': 0.0985005781834045, 'n_estimators': 900, 'subsample': 1.0, 'colsample_bytree': 0.30000000000000004, 'reg_lambda': 0.16546677461501663, 'reg_alpha': 0.0006831624619559347, 'gamma': 0.7580954487919044, 'min_child_weight': 4}. Best is trial 31 with value: 0.7718354034643009.
[I 2023-07-05 17:46:42,632] Trial 43 finished with value: 0.7573133924799325 and parameters: {'max_depth': 2, 'learning_rate': 0.03560304073641522, 'n_estimators': 800, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.4, 'reg_lambda': 1.1503736513745904, 'reg_alpha': 0.00041493469211161346, 'gamma': 1.598864281132709, 'min_child_weight': 1}. Best is trial 31 with value: 0.7718354034643009.
[I 2023-07-05 17:46:47,246] Trial 44 finished with value: 0.7607763413603718 and parameters: {'max_depth': 5, 'learning_rate': 0.047948310931628976, 'n_estimators': 1000, 'subsample': 0.6000000000000001

[I 2023-07-05 17:47:47,645] Trial 63 finished with value: 0.7690196873679764 and parameters: {'max_depth': 4, 'learning_rate': 0.07935541775705542, 'n_estimators': 1000, 'subsample': 1.0, 'colsample_bytree': 0.30000000000000004, 'reg_lambda': 0.8051877485804004, 'reg_alpha': 0.00525626413065774, 'gamma': 0.46291239890188707, 'min_child_weight': 7}. Best is trial 31 with value: 0.7718354034643009.
[I 2023-07-05 17:47:51,955] Trial 64 finished with value: 0.7691180397127165 and parameters: {'max_depth': 5, 'learning_rate': 0.06114779309909109, 'n_estimators': 900, 'subsample': 1.0, 'colsample_bytree': 0.4, 'reg_lambda': 0.4315395995409495, 'reg_alpha': 0.00048001514498976735, 'gamma': 1.1137364541813608, 'min_child_weight': 6}. Best is trial 31 with value: 0.7718354034643009.
[I 2023-07-05 17:47:55,653] Trial 65 finished with value: 0.769061934938741 and parameters: {'max_depth': 3, 'learning_rate': 0.08571035491527962, 'n_estimators': 1000, 'subsample': 0.9000000000000001, 'colsample_by

[I 2023-07-05 17:48:42,621] Trial 84 finished with value: 0.7597897760878749 and parameters: {'max_depth': 3, 'learning_rate': 0.04670322545655829, 'n_estimators': 500, 'subsample': 1.0, 'colsample_bytree': 0.4, 'reg_lambda': 0.019474053761100146, 'reg_alpha': 0.0005842725247262394, 'gamma': 3.6095367720218383, 'min_child_weight': 9}. Best is trial 72 with value: 0.7731109421208281.
[I 2023-07-05 17:48:45,094] Trial 85 finished with value: 0.7712696239966204 and parameters: {'max_depth': 4, 'learning_rate': 0.07429245538940114, 'n_estimators': 600, 'subsample': 1.0, 'colsample_bytree': 0.5, 'reg_lambda': 0.03544755753933173, 'reg_alpha': 0.0019216149884007034, 'gamma': 1.242631650492688, 'min_child_weight': 9}. Best is trial 72 with value: 0.7731109421208281.
[I 2023-07-05 17:48:47,517] Trial 86 finished with value: 0.7628366708914238 and parameters: {'max_depth': 4, 'learning_rate': 0.05392456448182267, 'n_estimators': 600, 'subsample': 1.0, 'colsample_bytree': 0.5, 'reg_lambda': 0.04

Results of K-fold CV for XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, gamma=None,
              gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, n_estimators=100, n_jobs=None,
              num_parallel_tree=None, predictor=None, random_state=None,
              reg_alpha=None, reg_lambda=None, ...) : [0.69961454 0.6847467  0.69575991 0.68309471 0.70035803]
Mean: 0.6927147774244544
Std: 0.007366610631281702




Best Hyperparameters: {'max_depth': 3, 'learning_rate': 0.07452314631965326, 'n_estimators': 900, 'subsample': 1.0, 'colsample_bytree': 0.30000000000000004, 'reg_lambda': 0.09919734255846553, 'reg_alpha': 0.0002524212417811137, 'gamma': 1.371890617189692, 'min_child_weight': 9}
Test Accuracy: 0.7040983606557377
Test F1 Score: 0.6558627264061009
Test AUC: 0.7709735530207014
Test Precision Score: 0.7070914696813977


In [None]:
# xgb without odds data
optimiser = Optimiser(df, "GBC", verbosity=False)
model = optimiser.optimise()

model_filename = "GBC_home_win_with_odds.pkl"
with open(model_filename, "wb") as file:
    pickle.dump(model, file)
    
    
# xgb without odds data
optimiser = Optimiser(df, "CAT", verbosity=False)
model = optimiser.optimise()

model_filename = "CAT_home_win_with_odds.pkl"
with open(model_filename, "wb") as file:
    pickle.dump(model, file)

[I 2023-07-05 17:51:33,725] A new study created in memory with name: no-name-957ff15d-5578-469a-aab2-9b887144e92a
[I 2023-07-05 17:51:40,179] Trial 0 finished with value: 0.7368098014364175 and parameters: {'learning_rate': 0.059775910369254254, 'n_estimators': 900, 'subsample': 0.2, 'min_samples_split': 4, 'min_samples_leaf': 9, 'max_depth': 5, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.7368098014364175.
[I 2023-07-05 17:51:41,639] Trial 1 finished with value: 0.7259863117870722 and parameters: {'learning_rate': 0.001722312961233823, 'n_estimators': 300, 'subsample': 0.5, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_depth': 1, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.7368098014364175.
[I 2023-07-05 17:51:45,024] Trial 2 finished with value: 0.7364150401351922 and parameters: {'learning_rate': 0.0018247440531201052, 'n_estimators': 300, 'subsample': 0.7, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_depth': 3, 'max_features': 'sqrt'}. Best is tri

[I 2023-07-05 17:54:05,446] Trial 14 finished with value: 0.7504797634136037 and parameters: {'learning_rate': 0.027986637683448716, 'n_estimators': 800, 'subsample': 0.30000000000000004, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_depth': 3, 'max_features': 'log2'}. Best is trial 6 with value: 0.7506406421630757.
[I 2023-07-05 17:54:16,263] Trial 15 finished with value: 0.7504621884241656 and parameters: {'learning_rate': 0.03706742513603658, 'n_estimators': 1000, 'subsample': 0.7, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_depth': 2, 'max_features': 'log2'}. Best is trial 6 with value: 0.7506406421630757.
[I 2023-07-05 17:54:22,751] Trial 16 finished with value: 0.7508211237853823 and parameters: {'learning_rate': 0.0350791906695544, 'n_estimators': 800, 'subsample': 0.30000000000000004, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_depth': 3, 'max_features': 'log2'}. Best is trial 16 with value: 0.7508211237853823.
[I 2023-07-05 17:54:30,669] Trial 17 finished

[I 2023-07-05 17:56:02,461] Trial 29 finished with value: 0.7547950992817912 and parameters: {'learning_rate': 0.04493502429970954, 'n_estimators': 500, 'subsample': 0.8, 'min_samples_split': 4, 'min_samples_leaf': 8, 'max_depth': 6, 'max_features': 'log2'}. Best is trial 28 with value: 0.7563173637515843.
[I 2023-07-05 17:56:14,916] Trial 30 finished with value: 0.7594984368398817 and parameters: {'learning_rate': 0.0490372961854511, 'n_estimators': 400, 'subsample': 0.8, 'min_samples_split': 4, 'min_samples_leaf': 8, 'max_depth': 6, 'max_features': 'log2'}. Best is trial 30 with value: 0.7594984368398817.
[I 2023-07-05 17:56:27,365] Trial 31 finished with value: 0.7593923109421208 and parameters: {'learning_rate': 0.05070363843953491, 'n_estimators': 400, 'subsample': 0.8, 'min_samples_split': 4, 'min_samples_leaf': 8, 'max_depth': 6, 'max_features': 'log2'}. Best is trial 30 with value: 0.7594984368398817.
[I 2023-07-05 17:56:39,739] Trial 32 finished with value: 0.7578599070553442 

[I 2023-07-05 17:58:24,014] Trial 43 finished with value: 0.7582539923954372 and parameters: {'learning_rate': 0.0338498685265353, 'n_estimators': 400, 'subsample': 0.9000000000000001, 'min_samples_split': 4, 'min_samples_leaf': 9, 'max_depth': 6, 'max_features': 'log2'}. Best is trial 30 with value: 0.7594984368398817.
[I 2023-07-05 17:58:42,606] Trial 44 finished with value: 0.7586764681030841 and parameters: {'learning_rate': 0.040180890690744465, 'n_estimators': 500, 'subsample': 1.0, 'min_samples_split': 4, 'min_samples_leaf': 8, 'max_depth': 6, 'max_features': 'log2'}. Best is trial 30 with value: 0.7594984368398817.
[I 2023-07-05 17:58:51,760] Trial 45 finished with value: 0.7525022391212505 and parameters: {'learning_rate': 0.06408431899707769, 'n_estimators': 300, 'subsample': 1.0, 'min_samples_split': 4, 'min_samples_leaf': 9, 'max_depth': 7, 'max_features': 'sqrt'}. Best is trial 30 with value: 0.7594984368398817.
[I 2023-07-05 17:59:02,044] Trial 46 finished with value: 0.7

[I 2023-07-05 18:02:27,230] Trial 58 finished with value: 0.7582526404731729 and parameters: {'learning_rate': 0.05915998204675285, 'n_estimators': 600, 'subsample': 1.0, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_depth': 4, 'max_features': 'log2'}. Best is trial 56 with value: 0.7628606675116181.
[I 2023-07-05 18:02:42,387] Trial 59 finished with value: 0.7628390367553866 and parameters: {'learning_rate': 0.07684630565242746, 'n_estimators': 600, 'subsample': 1.0, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_depth': 4, 'max_features': 'log2'}. Best is trial 56 with value: 0.7628606675116181.
[I 2023-07-05 18:02:55,006] Trial 60 finished with value: 0.7610693705111957 and parameters: {'learning_rate': 0.07796143240795518, 'n_estimators': 500, 'subsample': 1.0, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_depth': 4, 'max_features': 'log2'}. Best is trial 56 with value: 0.7628606675116181.
[I 2023-07-05 18:03:07,646] Trial 61 finished with value: 0.7609639205745669

[I 2023-07-05 18:05:35,484] Trial 73 finished with value: 0.7623002957329953 and parameters: {'learning_rate': 0.09748986636623776, 'n_estimators': 600, 'subsample': 1.0, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_depth': 4, 'max_features': 'log2'}. Best is trial 56 with value: 0.7628606675116181.
[I 2023-07-05 18:05:50,604] Trial 74 finished with value: 0.7585960287283482 and parameters: {'learning_rate': 0.09841364378087833, 'n_estimators': 600, 'subsample': 1.0, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_depth': 4, 'max_features': 'log2'}. Best is trial 56 with value: 0.7628606675116181.
[I 2023-07-05 18:06:08,217] Trial 75 finished with value: 0.7607570764681031 and parameters: {'learning_rate': 0.060819889023788355, 'n_estimators': 700, 'subsample': 1.0, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_depth': 4, 'max_features': 'log2'}. Best is trial 56 with value: 0.7628606675116181.
[I 2023-07-05 18:06:24,569] Trial 76 finished with value: 0.761865652724968

[I 2023-07-05 18:09:16,286] Trial 87 finished with value: 0.7523288550908322 and parameters: {'learning_rate': 0.09999996459852539, 'n_estimators': 800, 'subsample': 1.0, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_depth': 3, 'max_features': 'sqrt'}. Best is trial 56 with value: 0.7628606675116181.


In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

thresholds = [0.25, 0.3, 0.35, 0.40, 0.45, 0.5, 0.55, 0.60, 0.65, .70, .75, 0.8, 0.85, 0.9]

for t in thresholds:
    preds = []
    for i in range(len(y_prob_test)):
        if y_prob_test[i] >= t:
            preds.append(1.)
        else:
            preds.append(0.)
    
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    cm = confusion_matrix(y_test, preds)
    
    print(f"Threshold: {t}")
    print("Confusion Matrix:")
    print(cm)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("-----------------------")
    

Threshold: 0.25
Confusion Matrix:
[[ 466  846]
 [ 110 1014]]
Precision: 0.5451612903225806
Recall: 0.902135231316726
F1 Score: 0.6796246648793566
-----------------------
Threshold: 0.3
Confusion Matrix:
[[615 697]
 [164 960]]
Precision: 0.5793602896801449
Recall: 0.8540925266903915
F1 Score: 0.6903991370010787
-----------------------
Threshold: 0.35
Confusion Matrix:
[[745 567]
 [224 900]]
Precision: 0.6134969325153374
Recall: 0.800711743772242
F1 Score: 0.6947124662292551
-----------------------
Threshold: 0.4
Confusion Matrix:
[[854 458]
 [303 821]]
Precision: 0.6419077404222049
Recall: 0.7304270462633452
F1 Score: 0.6833125260091554
-----------------------
Threshold: 0.45
Confusion Matrix:
[[954 358]
 [373 751]]
Precision: 0.6771866546438232
Recall: 0.6681494661921709
F1 Score: 0.6726377071204657
-----------------------
Threshold: 0.5
Confusion Matrix:
[[1025  287]
 [ 437  687]]
Precision: 0.7053388090349076
Recall: 0.6112099644128114
F1 Score: 0.6549094375595805
-------------------

In [53]:
def get_profits(odds, preds, threshold, validation_set):

    profit = 0
    bets = 0
    odds = odds.reset_index(drop = True)

    for i in range(len(odds)):

        if preds[i] > threshold:
            bets += 1
            o = odds[i]
            bet = 1 * o 
            if validation_set[i] == 1:
                profit += bet - 1
            else:
                profit += -1
    return profit, bets, profit/bets*100

def get_value_strat1(odds, preds, threshold, validation_set):
    
    c1 = 0
    c2 = 0
    p1 = 0
    odds = odds.reset_index(drop = True)
    
    for i in range(len(preds)):
        if preds[i] > 1/odds[i]:
            if abs(preds[i] - 1/odds[i]) > threshold:
                if validation_set[i] == 1:
                    o = odds[i]
                    p1 += (1 * o)
                    c1 += 1
                else:
                    c2 += 1
                    p1 += -1
                    
    return p1, c1, c2, (p1/(c1+c2))*100

def get_value_strat2(odds, preds, threshold, validation_set):
    
    c1 = 0
    c2 = 0
    p1 = 0
    odds = odds.reset_index(drop = True)
    
    for i in range(len(preds)):
        if preds[i] < 1/odds[i]:
            if abs(preds[i] - 1/odds[i]) > threshold:
                if validation_set[i] == 0:
                    p = 1 - (1/odds[i])
                    p1 += (1 * (1/p)) - 1
                    c1 += 1
                else:
                    c2 += 1
                    p1 += -1
                    
    return p1, c1, c2, (p1/(c1+c2))*100

In [54]:
validation_set = y_test.reset_index(drop = True)

for t in [0.25, 0.3, 0.35, 0.40, 0.45, 0.5, 0.55, 0.60, 0.65, .70, .75, 0.8, 0.85, 0.9]:
    results = get_profits(odds, y_prob_test, t, validation_set)
    print(f"At threshold {t} for predictions: \n Profit: {results[0]} \n Number of bets: {results[1]} \n Return %: {results[2]} \n")

print(get_value_strat1(odds, preds, 0.05, validation_set))
print(get_value_strat2(odds, preds, 0.05, validation_set))

At threshold 0.25 for predictions: 
 Profit: 83.20433333333328 
 Number of bets: 1860 
 Return %: 4.473351254480284 

At threshold 0.3 for predictions: 
 Profit: 124.12099999999995 
 Number of bets: 1657 
 Return %: 7.490706095353045 

At threshold 0.35 for predictions: 
 Profit: 146.62933333333334 
 Number of bets: 1467 
 Return %: 9.995182912974323 

At threshold 0.4 for predictions: 
 Profit: 139.781 
 Number of bets: 1279 
 Return %: 10.928928850664581 

At threshold 0.45 for predictions: 
 Profit: 143.16933333333338 
 Number of bets: 1109 
 Return %: 12.909768560264506 

At threshold 0.5 for predictions: 
 Profit: 134.03266666666678 
 Number of bets: 974 
 Return %: 13.761054072553058 

At threshold 0.55 for predictions: 
 Profit: 124.90433333333337 
 Number of bets: 840 
 Return %: 14.869563492063495 

At threshold 0.6 for predictions: 
 Profit: 98.78766666666671 
 Number of bets: 682 
 Return %: 14.484995112414472 

At threshold 0.65 for predictions: 
 Profit: 68.6933333333333 


In [62]:
X_train = train[features]
X_test = test[features]
y_train = train['home_win']
y_test = test['home_win']

model = LGBMClassifier()

kfold = StratifiedKFold(n_splits=5, shuffle=True)
results = cross_val_score(model, X_train, y_train, cv=kfold, verbose=True)

print("Results: ", results)
print("Mean: ", results.mean())
print("Std: ", results.std())

cv_results = cross_validate(model, X_train, y_train, cv=kfold, return_estimator=True)
trained_models = cv_results['estimator']
trained_model = model.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.6s finished


Results:  [0.71822199 0.71016376 0.71380296 0.70280811 0.70826833]
Mean:  0.7106530323079312
Std:  0.005189993417007074


In [63]:
X_train = train[features]
X_test = test[features]
y_train = train['home_win']
y_test = test['home_win']

def objective(trial):

    params = {
        'boosting_type': 'gbdt',
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=100),
        'subsample': trial.suggest_float('subsample', 0.2, 1.0, step=0.1),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0, step=0.1),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 10, log=True)
    }

    model = LGBMClassifier(**params)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    auc = np.mean(auc)
    obj_vector = np.array([accuracy, f1, precision, auc])
    print(obj_vector)
    return auc

study = optuna.create_study(directions=['maximize'])  
study.optimize(objective, n_trials=100)


best_params = study.best_params
best_model = LGBMClassifier(**best_params)
best_model.fit(X_train, y_train, early_stopping_rounds=1000,
                  eval_set=[(X_test, y_test)])

y_pred_test = best_model.predict(X_test)
y_prob_test = best_model.predict_proba(X_test)[:, 1] 

test_accuracy = accuracy_score(y_test, y_pred_test)
test_f1 = f1_score(y_test, y_pred_test)

test_roc_auc_score = roc_auc_score(y_test, y_prob_test)
test_precision_score = precision_score(y_test, y_pred_test)

print("Best Hyperparameters:", best_params)
print("Test Accuracy:", test_accuracy)
print("Test F1 Score:", test_f1)
print("Test auc:", test_roc_auc_score)
print("Test Precision Score:", test_precision_score)

model_filename = "lgbm_home_win_with_odds.pkl"
with open(model_filename, "wb") as file:
    pickle.dump(best_model, file)

[I 2023-06-30 16:35:41,832] A new study created in memory with name: no-name-b801ac68-d7b6-4f9d-8fd9-f0afa4402172
[I 2023-06-30 16:35:42,958] Trial 0 finished with value: 0.6799594219251801 and parameters: {'max_depth': 6, 'learning_rate': 0.0033957085450997343, 'n_estimators': 900, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.5, 'reg_lambda': 7.483684930461474e-05, 'reg_alpha': 0.0001549762589105797}. Best is trial 0 with value: 0.6799594219251801.


[0.68842365 0.62812347 0.69901854 0.67995942]


[I 2023-06-30 16:35:43,339] Trial 1 finished with value: 0.6889891285478691 and parameters: {'max_depth': 4, 'learning_rate': 0.08191744158464657, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 0.00013597601715941752, 'reg_alpha': 9.930740700875111e-05}. Best is trial 1 with value: 0.6889891285478691.


[0.69499179 0.64903165 0.6918429  0.68898913]


[I 2023-06-30 16:35:44,016] Trial 2 finished with value: 0.6851727280618002 and parameters: {'max_depth': 4, 'learning_rate': 0.0047852502965763585, 'n_estimators': 800, 'subsample': 0.6000000000000001, 'colsample_bytree': 1.0, 'reg_lambda': 0.0017487553352383877, 'reg_alpha': 0.0023592832532525376}. Best is trial 1 with value: 0.6889891285478691.


[0.69252874 0.63903614 0.69716088 0.68517273]


[I 2023-06-30 16:35:44,363] Trial 3 finished with value: 0.677293095217429 and parameters: {'max_depth': 3, 'learning_rate': 0.007422600299561091, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 0.00015120875736949062, 'reg_alpha': 1.2503327553343988}. Best is trial 1 with value: 0.6889891285478691.


[0.68513957 0.62785056 0.6905016  0.6772931 ]


[I 2023-06-30 16:35:44,756] Trial 4 finished with value: 0.6854927957642567 and parameters: {'max_depth': 7, 'learning_rate': 0.04094635884167254, 'n_estimators': 300, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.2, 'reg_lambda': 0.0013117207229135952, 'reg_alpha': 0.0006033914190677552}. Best is trial 1 with value: 0.6889891285478691.


[0.69211823 0.64251668 0.69199179 0.6854928 ]


[I 2023-06-30 16:35:45,081] Trial 5 finished with value: 0.6645826100164917 and parameters: {'max_depth': 5, 'learning_rate': 0.012377798476398118, 'n_estimators': 300, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.30000000000000004, 'reg_lambda': 0.0018515502828461874, 'reg_alpha': 0.4122022692771079}. Best is trial 1 with value: 0.6889891285478691.


[0.67364532 0.60740741 0.68257492 0.66458261]


[I 2023-06-30 16:35:45,454] Trial 6 finished with value: 0.6545398077423835 and parameters: {'max_depth': 8, 'learning_rate': 0.006440644442267904, 'n_estimators': 300, 'subsample': 0.8, 'colsample_bytree': 0.30000000000000004, 'reg_lambda': 0.0002799296477872034, 'reg_alpha': 7.068318036222236e-05}. Best is trial 1 with value: 0.6889891285478691.


[0.66502463 0.58829465 0.67948718 0.65453981]


[I 2023-06-30 16:35:45,756] Trial 7 finished with value: 0.6899981555420536 and parameters: {'max_depth': 8, 'learning_rate': 0.009378726488492506, 'n_estimators': 200, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.7, 'reg_lambda': 0.003956366738154925, 'reg_alpha': 4.427223377366792e-05}. Best is trial 7 with value: 0.6899981555420536.


[0.69827586 0.6405868  0.7111835  0.68999816]


[I 2023-06-30 16:35:46,144] Trial 8 finished with value: 0.6519874034372016 and parameters: {'max_depth': 4, 'learning_rate': 0.0014457303557080588, 'n_estimators': 500, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 0.0022032471543014936, 'reg_alpha': 7.942084067850918e-05}. Best is trial 7 with value: 0.6899981555420536.


[0.66584565 0.56609808 0.70611702 0.6519874 ]


[I 2023-06-30 16:35:46,792] Trial 9 finished with value: 0.6917178413332176 and parameters: {'max_depth': 5, 'learning_rate': 0.009190384943585584, 'n_estimators': 600, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 0.0018107948397834812, 'reg_alpha': 4.248323067535345}. Best is trial 9 with value: 0.6917178413332176.


[0.69868637 0.64813039 0.7027027  0.69171784]


[I 2023-06-30 16:35:47,164] Trial 10 finished with value: 0.6776145191389636 and parameters: {'max_depth': 1, 'learning_rate': 0.02294848657201316, 'n_estimators': 1000, 'subsample': 0.7, 'colsample_bytree': 1.0, 'reg_lambda': 0.13910813905473401, 'reg_alpha': 9.446081579068625}. Best is trial 9 with value: 0.6917178413332176.


[0.68431856 0.63328565 0.68242549 0.67761452]


[I 2023-06-30 16:35:47,371] Trial 11 finished with value: 0.6911400920059022 and parameters: {'max_depth': 10, 'learning_rate': 0.01652114988674931, 'n_estimators': 100, 'subsample': 1.0, 'colsample_bytree': 0.8, 'reg_lambda': 0.0839403034426011, 'reg_alpha': 0.02234717587975891}. Best is trial 9 with value: 0.6917178413332176.


[0.6999179  0.639724   0.71712707 0.69114009]
[0.70155993 0.64380206 0.71646674 0.69317442]


[I 2023-06-30 16:35:47,568] Trial 12 finished with value: 0.6931744206232098 and parameters: {'max_depth': 10, 'learning_rate': 0.018668609363599566, 'n_estimators': 100, 'subsample': 1.0, 'colsample_bytree': 0.8, 'reg_lambda': 0.06309790619756439, 'reg_alpha': 0.053108472523400986}. Best is trial 12 with value: 0.6931744206232098.
[I 2023-06-30 16:35:48,276] Trial 13 finished with value: 0.7006146384862425 and parameters: {'max_depth': 10, 'learning_rate': 0.02885435722554123, 'n_estimators': 500, 'subsample': 0.2, 'colsample_bytree': 0.8, 'reg_lambda': 2.9449961864760237, 'reg_alpha': 0.03988166259073895}. Best is trial 13 with value: 0.7006146384862425.


[0.70689655 0.66096866 0.70875764 0.70061464]
[0.70073892 0.64833575 0.7081138  0.69336836]


[I 2023-06-30 16:35:48,480] Trial 14 finished with value: 0.693368359951393 and parameters: {'max_depth': 10, 'learning_rate': 0.03032823316922083, 'n_estimators': 100, 'subsample': 0.2, 'colsample_bytree': 0.8, 'reg_lambda': 7.088735634377983, 'reg_alpha': 0.07149214466439999}. Best is trial 13 with value: 0.7006146384862425.
[I 2023-06-30 16:35:49,082] Trial 15 finished with value: 0.7003596692995399 and parameters: {'max_depth': 9, 'learning_rate': 0.03308930217106253, 'n_estimators': 400, 'subsample': 0.2, 'colsample_bytree': 0.8, 'reg_lambda': 8.845408380534133, 'reg_alpha': 0.12941820089529577}. Best is trial 13 with value: 0.7006146384862425.


[0.70689655 0.65967588 0.71047228 0.70035967]


[I 2023-06-30 16:35:49,643] Trial 16 finished with value: 0.7063975566357087 and parameters: {'max_depth': 8, 'learning_rate': 0.051780452131762605, 'n_estimators': 400, 'subsample': 0.2, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 9.06826688759588, 'reg_alpha': 0.009826364138760217}. Best is trial 16 with value: 0.7063975566357087.


[0.71223317 0.66918358 0.71256281 0.70639756]


[I 2023-06-30 16:35:50,528] Trial 17 finished with value: 0.6912784263518792 and parameters: {'max_depth': 8, 'learning_rate': 0.06886892442180567, 'n_estimators': 700, 'subsample': 0.4, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 1.3870772029900902, 'reg_alpha': 0.005264750673450992}. Best is trial 16 with value: 0.7063975566357087.


[0.69663383 0.65418811 0.69002962 0.69127843]


[I 2023-06-30 16:35:51,067] Trial 18 finished with value: 0.7005536086277233 and parameters: {'max_depth': 7, 'learning_rate': 0.05510805060732313, 'n_estimators': 400, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 1.1691704662358575, 'reg_alpha': 0.009427996595692356}. Best is trial 16 with value: 0.7063975566357087.


[0.70607553 0.66384977 0.7027833  0.70055361]


[I 2023-06-30 16:35:51,735] Trial 19 finished with value: 0.6915320393195035 and parameters: {'max_depth': 9, 'learning_rate': 0.0899798508338393, 'n_estimators': 500, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 1.59420602736326, 'reg_alpha': 0.0015148200945954986}. Best is trial 16 with value: 0.7063975566357087.


[0.69704433 0.65384615 0.69146825 0.69153204]


[I 2023-06-30 16:35:52,286] Trial 20 finished with value: 0.6954067572259353 and parameters: {'max_depth': 9, 'learning_rate': 0.05021501266808442, 'n_estimators': 400, 'subsample': 0.2, 'colsample_bytree': 1.0, 'reg_lambda': 0.3284727218679044, 'reg_alpha': 1.2681541396545948e-05}. Best is trial 16 with value: 0.7063975566357087.


[0.70114943 0.65725047 0.698      0.69540676]


[I 2023-06-30 16:35:52,811] Trial 21 finished with value: 0.6907074581199548 and parameters: {'max_depth': 7, 'learning_rate': 0.05593717596482994, 'n_estimators': 400, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 1.6981754884689602, 'reg_alpha': 0.014428506980925812}. Best is trial 16 with value: 0.7063975566357087.


[0.69581281 0.65454545 0.68756121 0.69070746]


[I 2023-06-30 16:35:53,439] Trial 22 finished with value: 0.6971237305789427 and parameters: {'max_depth': 7, 'learning_rate': 0.0984413006190074, 'n_estimators': 500, 'subsample': 0.2, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 5.836175020709964, 'reg_alpha': 0.005374484555205109}. Best is trial 16 with value: 0.7063975566357087.


[0.70238095 0.66105657 0.69655172 0.69712373]


[I 2023-06-30 16:35:54,246] Trial 23 finished with value: 0.7038573583022307 and parameters: {'max_depth': 6, 'learning_rate': 0.04839195208563363, 'n_estimators': 700, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 0.6334458202534671, 'reg_alpha': 0.01786509872825797}. Best is trial 16 with value: 0.7063975566357087.


[0.70935961 0.66760563 0.70675944 0.70385736]


[I 2023-06-30 16:35:55,062] Trial 24 finished with value: 0.6990916044614183 and parameters: {'max_depth': 6, 'learning_rate': 0.03240647048175348, 'n_estimators': 700, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 0.02789555969479975, 'reg_alpha': 0.022018038458405498}. Best is trial 16 with value: 0.7063975566357087.


[0.70484401 0.66132831 0.7027027  0.6990916 ]


[I 2023-06-30 16:35:55,965] Trial 25 finished with value: 0.6934375271243816 and parameters: {'max_depth': 9, 'learning_rate': 0.04270073605959379, 'n_estimators': 700, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 0.40672026280555174, 'reg_alpha': 0.21821550462669084}. Best is trial 16 with value: 0.7063975566357087.


[0.69909688 0.65538317 0.69491525 0.69343753]


[I 2023-06-30 16:35:57,059] Trial 26 finished with value: 0.7030287084454474 and parameters: {'max_depth': 8, 'learning_rate': 0.027374607352472314, 'n_estimators': 800, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 3.8107192174707376, 'reg_alpha': 0.07917864615758582}. Best is trial 16 with value: 0.7063975566357087.


[0.70935961 0.6634981  0.7122449  0.70302871]


[I 2023-06-30 16:35:57,951] Trial 27 finished with value: 0.6937562386077597 and parameters: {'max_depth': 6, 'learning_rate': 0.06568977505417774, 'n_estimators': 800, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 0.44267142881856414, 'reg_alpha': 0.3345675560054851}. Best is trial 16 with value: 0.7063975566357087.


[0.69909688 0.65699579 0.69299112 0.69375624]


[I 2023-06-30 16:35:59,147] Trial 28 finished with value: 0.7067135556809304 and parameters: {'max_depth': 8, 'learning_rate': 0.021666346746337602, 'n_estimators': 900, 'subsample': 0.5, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 9.817098592636615, 'reg_alpha': 0.10422672062405058}. Best is trial 28 with value: 0.7067135556809304.


[0.71305419 0.66761769 0.71705822 0.70671356]


[I 2023-06-30 16:36:00,222] Trial 29 finished with value: 0.6936897838729277 and parameters: {'max_depth': 6, 'learning_rate': 0.01940279995397545, 'n_estimators': 900, 'subsample': 0.7, 'colsample_bytree': 0.4, 'reg_lambda': 4.058221787446444, 'reg_alpha': 0.0003783261055181683}. Best is trial 28 with value: 0.7067135556809304.


[0.6999179  0.65339023 0.69949239 0.69368978]


[I 2023-06-30 16:36:00,657] Trial 30 finished with value: 0.6823748481034632 and parameters: {'max_depth': 2, 'learning_rate': 0.013698669967848303, 'n_estimators': 1000, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 7.988535814476174, 'reg_alpha': 0.03389053222594849}. Best is trial 28 with value: 0.7067135556809304.


[0.69047619 0.63255361 0.69935345 0.68237485]


[I 2023-06-30 16:36:01,812] Trial 31 finished with value: 0.6957241124902352 and parameters: {'max_depth': 8, 'learning_rate': 0.021214950089914118, 'n_estimators': 900, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 2.7201914671311758, 'reg_alpha': 0.09837732824914658}. Best is trial 28 with value: 0.7067135556809304.


[0.70155993 0.65723715 0.69909729 0.69572411]


[I 2023-06-30 16:36:02,789] Trial 32 finished with value: 0.700743479298672 and parameters: {'max_depth': 7, 'learning_rate': 0.04041015268015262, 'n_estimators': 800, 'subsample': 0.4, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 8.269851928924536, 'reg_alpha': 0.011634855394857297}. Best is trial 28 with value: 0.7067135556809304.


[0.70648604 0.66321244 0.7047047  0.70074348]


[I 2023-06-30 16:36:03,839] Trial 33 finished with value: 0.7001074125509938 and parameters: {'max_depth': 8, 'learning_rate': 0.026560568342111783, 'n_estimators': 800, 'subsample': 0.5, 'colsample_bytree': 0.7, 'reg_lambda': 0.9270728008745543, 'reg_alpha': 0.051789099091578694}. Best is trial 28 with value: 0.7067135556809304.


[0.70607553 0.66162571 0.70564516 0.70010741]


[I 2023-06-30 16:36:04,714] Trial 34 finished with value: 0.6923593329572086 and parameters: {'max_depth': 5, 'learning_rate': 0.06814296877160232, 'n_estimators': 900, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.5, 'reg_lambda': 2.9355889850722594, 'reg_alpha': 0.13691655562111235}. Best is trial 28 with value: 0.7067135556809304.


[0.69745484 0.65641026 0.68952008 0.69235933]


[I 2023-06-30 16:36:05,666] Trial 35 finished with value: 0.6952806288516622 and parameters: {'max_depth': 9, 'learning_rate': 0.03765649037580498, 'n_estimators': 700, 'subsample': 0.5, 'colsample_bytree': 0.4, 'reg_lambda': 3.554256058291242, 'reg_alpha': 0.6470828565307712}. Best is trial 28 with value: 0.7067135556809304.


[0.70073892 0.65822785 0.69573835 0.69528063]


[I 2023-06-30 16:36:06,448] Trial 36 finished with value: 0.6943231381824494 and parameters: {'max_depth': 7, 'learning_rate': 0.023281066199325486, 'n_estimators': 600, 'subsample': 0.7, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 9.84078513767068, 'reg_alpha': 0.0027982618852927217}. Best is trial 28 with value: 0.7067135556809304.


[0.70114943 0.65167464 0.70496894 0.69432314]


[I 2023-06-30 16:36:07,201] Trial 37 finished with value: 0.6990916044614183 and parameters: {'max_depth': 4, 'learning_rate': 0.04539871615539839, 'n_estimators': 1000, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'reg_lambda': 0.6671824105211022, 'reg_alpha': 0.16132178998349925}. Best is trial 28 with value: 0.7067135556809304.


[0.70484401 0.66132831 0.7027027  0.6990916 ]


[I 2023-06-30 16:36:08,320] Trial 38 finished with value: 0.7034098060064231 and parameters: {'max_depth': 8, 'learning_rate': 0.01570375448085569, 'n_estimators': 800, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 0.23890955349236268, 'reg_alpha': 0.03070952254511473}. Best is trial 28 with value: 0.7067135556809304.


[0.70977011 0.6638136  0.71297242 0.70340981]


[I 2023-06-30 16:36:09,237] Trial 39 finished with value: 0.6984528252755837 and parameters: {'max_depth': 5, 'learning_rate': 0.015804428849966707, 'n_estimators': 900, 'subsample': 0.5, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 0.1676501679529765, 'reg_alpha': 0.021393319878630034}. Best is trial 28 with value: 0.7067135556809304.


[0.70525452 0.65645933 0.71014493 0.69845283]


[I 2023-06-30 16:36:09,650] Trial 40 finished with value: 0.682948528773544 and parameters: {'max_depth': 3, 'learning_rate': 0.013701406405143906, 'n_estimators': 700, 'subsample': 0.30000000000000004, 'colsample_bytree': 1.0, 'reg_lambda': 0.6497067806102498, 'reg_alpha': 0.007132107082485867}. Best is trial 28 with value: 0.7067135556809304.


[0.69047619 0.63574879 0.69556025 0.68294853]


[I 2023-06-30 16:36:10,703] Trial 41 finished with value: 0.6970586320631891 and parameters: {'max_depth': 8, 'learning_rate': 0.024158325962121308, 'n_estimators': 800, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 2.8109520362735188, 'reg_alpha': 0.035186255290502}. Best is trial 28 with value: 0.7067135556809304.


[0.70279146 0.65913371 0.7        0.69705863]


[I 2023-06-30 16:36:11,807] Trial 42 finished with value: 0.703091094523045 and parameters: {'max_depth': 8, 'learning_rate': 0.01236335203326929, 'n_estimators': 800, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 2.0795622890548686, 'reg_alpha': 0.08968071774790792}. Best is trial 28 with value: 0.7067135556809304.


[0.70977011 0.66220736 0.71517028 0.70309109]


[I 2023-06-30 16:36:12,635] Trial 43 finished with value: 0.6936870714347713 and parameters: {'max_depth': 7, 'learning_rate': 0.010825138107007507, 'n_estimators': 600, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.8, 'reg_lambda': 0.8909978671583662, 'reg_alpha': 0.26225966765758135}. Best is trial 28 with value: 0.7067135556809304.


[0.70073892 0.650024   0.70594369 0.69368707]


[I 2023-06-30 16:36:13,951] Trial 44 finished with value: 0.6889877723287908 and parameters: {'max_depth': 8, 'learning_rate': 0.006760532573247477, 'n_estimators': 900, 'subsample': 0.4, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 1.7832562453833654, 'reg_alpha': 0.8607677328688312}. Best is trial 28 with value: 0.7067135556809304.


[0.6954023  0.6473384  0.69489796 0.68898777]


[I 2023-06-30 16:36:15,118] Trial 45 finished with value: 0.6974370171860081 and parameters: {'max_depth': 9, 'learning_rate': 0.008320209226774844, 'n_estimators': 800, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7, 'reg_lambda': 0.26447620726234844, 'reg_alpha': 0.015186445014563713}. Best is trial 28 with value: 0.7067135556809304.


[0.70402299 0.65617549 0.70709147 0.69743702]


[I 2023-06-30 16:36:15,400] Trial 46 finished with value: 0.6540949678847323 and parameters: {'max_depth': 6, 'learning_rate': 0.010742193079549066, 'n_estimators': 200, 'subsample': 0.5, 'colsample_bytree': 0.2, 'reg_lambda': 0.5808971923340446, 'reg_alpha': 0.07672411605458569}. Best is trial 28 with value: 0.7067135556809304.


[0.66461412 0.58758203 0.67911319 0.65409497]


[I 2023-06-30 16:36:16,209] Trial 47 finished with value: 0.6989614074299106 and parameters: {'max_depth': 7, 'learning_rate': 0.016530687148895745, 'n_estimators': 600, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.8, 'reg_lambda': 0.17204638953458373, 'reg_alpha': 0.033192667247457126}. Best is trial 28 with value: 0.7067135556809304.


[0.70566502 0.65742953 0.71001032 0.69896141]


[I 2023-06-30 16:36:17,605] Trial 48 finished with value: 0.6819964629806441 and parameters: {'max_depth': 8, 'learning_rate': 0.004501193352453019, 'n_estimators': 1000, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 4.6632053750977045, 'reg_alpha': 0.48970690963940183}. Best is trial 28 with value: 0.7067135556809304.


[0.68924466 0.63588264 0.6921466  0.68199646]


[I 2023-06-30 16:36:18,425] Trial 49 finished with value: 0.7064599427133061 and parameters: {'max_depth': 6, 'learning_rate': 0.033897390991186965, 'n_estimators': 700, 'subsample': 0.4, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 1.5409931272979087, 'reg_alpha': 0.12835983057194006}. Best is trial 28 with value: 0.7067135556809304.


[0.71264368 0.66793169 0.71544715 0.70645994]


[I 2023-06-30 16:36:19,281] Trial 50 finished with value: 0.7070322671643086 and parameters: {'max_depth': 6, 'learning_rate': 0.034777988696943526, 'n_estimators': 700, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 1.123937079562094, 'reg_alpha': 0.28933686530206554}. Best is trial 50 with value: 0.7070322671643086.


[0.71305419 0.66919072 0.7148635  0.70703227]


[I 2023-06-30 16:36:20,140] Trial 51 finished with value: 0.7020142565749501 and parameters: {'max_depth': 6, 'learning_rate': 0.03615325083896145, 'n_estimators': 700, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 0.9110782936808016, 'reg_alpha': 1.4878285258988722}. Best is trial 50 with value: 0.7070322671643086.


[0.70771757 0.66478343 0.706      0.70201426]


[I 2023-06-30 16:36:20,582] Trial 52 finished with value: 0.6973705624511761 and parameters: {'max_depth': 4, 'learning_rate': 0.03089188446074245, 'n_estimators': 600, 'subsample': 0.2, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 1.2231788859732668, 'reg_alpha': 0.26998107407723243}. Best is trial 50 with value: 0.7070322671643086.


[0.70484401 0.65248913 0.71428571 0.69737056]


[I 2023-06-30 16:36:21,281] Trial 53 finished with value: 0.694134623730579 and parameters: {'max_depth': 5, 'learning_rate': 0.050273535661902465, 'n_estimators': 700, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 5.400689562136068, 'reg_alpha': 0.15598080880463805}. Best is trial 50 with value: 0.7070322671643086.


[0.70032841 0.65402844 0.69979716 0.69413462]


[I 2023-06-30 16:36:21,821] Trial 54 finished with value: 0.6909570024303445 and parameters: {'max_depth': 5, 'learning_rate': 0.04083742357471448, 'n_estimators': 500, 'subsample': 0.2, 'colsample_bytree': 0.5, 'reg_lambda': 1.5650002312482192, 'reg_alpha': 0.052050547698880234}. Best is trial 50 with value: 0.7070322671643086.


[0.69745484 0.64921466 0.69805527 0.690957  ]


[I 2023-06-30 16:36:22,201] Trial 55 finished with value: 0.696741276798889 and parameters: {'max_depth': 7, 'learning_rate': 0.07776589087140232, 'n_estimators': 300, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.8, 'reg_lambda': 0.331017539398628, 'reg_alpha': 0.022696042336936645}. Best is trial 50 with value: 0.7070322671643086.


[0.70238095 0.65914433 0.69890329 0.69674128]


[I 2023-06-30 16:36:22,930] Trial 56 finished with value: 0.7064599427133061 and parameters: {'max_depth': 6, 'learning_rate': 0.03465155626049339, 'n_estimators': 600, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 6.028963764790881, 'reg_alpha': 0.0084826451680447}. Best is trial 50 with value: 0.7070322671643086.


[0.71264368 0.66793169 0.71544715 0.70645994]


[I 2023-06-30 16:36:23,646] Trial 57 finished with value: 0.6954691433035327 and parameters: {'max_depth': 6, 'learning_rate': 0.03641755558552703, 'n_estimators': 600, 'subsample': 0.2, 'colsample_bytree': 0.5, 'reg_lambda': 5.824899546503514, 'reg_alpha': 0.008853250276034173}. Best is trial 50 with value: 0.7070322671643086.


[0.70155993 0.65593942 0.70070779 0.69546914]


[I 2023-06-30 16:36:24,216] Trial 58 finished with value: 0.6844132453780054 and parameters: {'max_depth': 6, 'learning_rate': 0.06120002335376055, 'n_estimators': 500, 'subsample': 0.4, 'colsample_bytree': 0.30000000000000004, 'reg_lambda': 9.562772847015012, 'reg_alpha': 0.003923824910123514}. Best is trial 50 with value: 0.7070322671643086.


[0.6908867  0.64194009 0.68947906 0.68441325]


[I 2023-06-30 16:36:24,595] Trial 59 finished with value: 0.6978805008245812 and parameters: {'max_depth': 6, 'learning_rate': 0.02755049687726117, 'n_estimators': 300, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 2.4841666609008604, 'reg_alpha': 0.002048574338826654}. Best is trial 50 with value: 0.7070322671643086.


[0.70484401 0.65515588 0.710718   0.6978805 ]


[I 2023-06-30 16:36:25,307] Trial 60 finished with value: 0.7005522524086452 and parameters: {'max_depth': 5, 'learning_rate': 0.04988105340365471, 'n_estimators': 700, 'subsample': 0.4, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 4.598057014733462, 'reg_alpha': 0.012840804407536032}. Best is trial 50 with value: 0.7070322671643086.


[0.70648604 0.66225791 0.70594159 0.70055225]


[I 2023-06-30 16:36:26,191] Trial 61 finished with value: 0.6991539905390158 and parameters: {'max_depth': 7, 'learning_rate': 0.03193258430802692, 'n_estimators': 700, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 2.229760879555247, 'reg_alpha': 0.05013071759739925}. Best is trial 50 with value: 0.7070322671643086.


[0.70525452 0.66003788 0.70546559 0.69915399]


[I 2023-06-30 16:36:26,916] Trial 62 finished with value: 0.6985816660880132 and parameters: {'max_depth': 6, 'learning_rate': 0.021063636222428656, 'n_estimators': 600, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 1.1308900111613542, 'reg_alpha': 0.006960134152515835}. Best is trial 50 with value: 0.7070322671643086.


[0.70484401 0.65875653 0.70600203 0.69858167]


[I 2023-06-30 16:36:27,912] Trial 63 finished with value: 0.7048107803142089 and parameters: {'max_depth': 7, 'learning_rate': 0.047436405512429614, 'n_estimators': 800, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 6.393147329698842, 'reg_alpha': 0.01714992553296061}. Best is trial 50 with value: 0.7070322671643086.


[0.71018062 0.66916589 0.70693069 0.70481078]


[I 2023-06-30 16:36:28,797] Trial 64 finished with value: 0.6927377180800277 and parameters: {'max_depth': 7, 'learning_rate': 0.04553487570249262, 'n_estimators': 700, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 6.243429458240713, 'reg_alpha': 0.004575407662174054}. Best is trial 50 with value: 0.7070322671643086.


[0.69868637 0.65344665 0.69617706 0.69273772]


[I 2023-06-30 16:36:29,788] Trial 65 finished with value: 0.6992204452738477 and parameters: {'max_depth': 7, 'learning_rate': 0.05709291860860947, 'n_estimators': 800, 'subsample': 0.5, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 6.358835130501796, 'reg_alpha': 0.01608462940468774}. Best is trial 50 with value: 0.7070322671643086.


[0.7044335  0.6635514  0.6988189  0.69922045]


[I 2023-06-30 16:36:30,512] Trial 66 finished with value: 0.7053817485461331 and parameters: {'max_depth': 6, 'learning_rate': 0.03668022835461691, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 3.816613850827075, 'reg_alpha': 0.009432286234831692}. Best is trial 50 with value: 0.7070322671643086.


[0.71100164 0.66886171 0.70958084 0.70538175]


[I 2023-06-30 16:36:31,033] Trial 67 finished with value: 0.6911482293203713 and parameters: {'max_depth': 5, 'learning_rate': 0.039758018481796505, 'n_estimators': 500, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.8, 'reg_lambda': 4.23673110781861, 'reg_alpha': 0.0012497492210784666}. Best is trial 50 with value: 0.7070322671643086.


[0.69745484 0.65021357 0.69684639 0.69114823]


[I 2023-06-30 16:36:31,557] Trial 68 finished with value: 0.70487045395365 and parameters: {'max_depth': 6, 'learning_rate': 0.03365307175184717, 'n_estimators': 400, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 3.7073071148271337, 'reg_alpha': 0.009947893528288641}. Best is trial 50 with value: 0.7070322671643086.


[0.71141215 0.66475918 0.71634121 0.70487045]


[I 2023-06-30 16:36:31,874] Trial 69 finished with value: 0.6945753949309956 and parameters: {'max_depth': 4, 'learning_rate': 0.025826967096664835, 'n_estimators': 400, 'subsample': 0.5, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 3.7728872052691047, 'reg_alpha': 0.00787368541514867}. Best is trial 50 with value: 0.7070322671643086.


[0.70197044 0.6496139  0.70991561 0.69457539]


[I 2023-06-30 16:36:32,205] Trial 70 finished with value: 0.6927350056418715 and parameters: {'max_depth': 5, 'learning_rate': 0.03539325631824659, 'n_estimators': 300, 'subsample': 0.6000000000000001, 'colsample_bytree': 1.0, 'reg_lambda': 1.7964290282959885, 'reg_alpha': 0.010218273687560092}. Best is trial 50 with value: 0.7070322671643086.


[0.69950739 0.6500956  0.70247934 0.69273501]


[I 2023-06-30 16:36:32,943] Trial 71 finished with value: 0.7097623361687353 and parameters: {'max_depth': 6, 'learning_rate': 0.031881364207890645, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 6.550787086628024, 'reg_alpha': 0.004353727025251494}. Best is trial 71 with value: 0.7097623361687353.


[0.71633826 0.67016706 0.72296601 0.70976234]


[I 2023-06-30 16:36:33,550] Trial 72 finished with value: 0.698897665133235 and parameters: {'max_depth': 6, 'learning_rate': 0.029308090141345267, 'n_estimators': 500, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.8, 'reg_lambda': 2.783230575964496, 'reg_alpha': 0.0032807954874742038}. Best is trial 71 with value: 0.7097623361687353.


[0.70566502 0.65710187 0.71044467 0.69889767]


[I 2023-06-30 16:36:34,107] Trial 73 finished with value: 0.7023288994010937 and parameters: {'max_depth': 6, 'learning_rate': 0.0337326784407107, 'n_estimators': 400, 'subsample': 0.7, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 9.959803260782476, 'reg_alpha': 0.006196003073826274}. Best is trial 71 with value: 0.7097623361687353.


[0.7089491  0.66157518 0.71369722 0.7023289 ]


[I 2023-06-30 16:36:34,840] Trial 74 finished with value: 0.7053152938113012 and parameters: {'max_depth': 6, 'learning_rate': 0.02342072942568648, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 3.435061064589308, 'reg_alpha': 0.0041551582054232}. Best is trial 71 with value: 0.7097623361687353.


[0.71182266 0.66539561 0.71663244 0.70531529]


[I 2023-06-30 16:36:35,495] Trial 75 finished with value: 0.6927363618609496 and parameters: {'max_depth': 5, 'learning_rate': 0.023719103966071953, 'n_estimators': 600, 'subsample': 0.9000000000000001, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 6.854504508431268, 'reg_alpha': 0.004531048974810355}. Best is trial 71 with value: 0.7097623361687353.


[0.69909688 0.65178147 0.69928644 0.69273636]


[I 2023-06-30 16:36:35,743] Trial 76 finished with value: 0.6776145191389636 and parameters: {'max_depth': 1, 'learning_rate': 0.026750079046907467, 'n_estimators': 600, 'subsample': 0.2, 'colsample_bytree': 0.7, 'reg_lambda': 2.128698026019208, 'reg_alpha': 0.0021555780503499833}. Best is trial 71 with value: 0.7097623361687353.


[0.68431856 0.63328565 0.68242549 0.67761452]


[I 2023-06-30 16:36:36,385] Trial 77 finished with value: 0.6988339228365593 and parameters: {'max_depth': 6, 'learning_rate': 0.020112766747662658, 'n_estimators': 500, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 1.467839521058236, 'reg_alpha': 0.00522288659747846}. Best is trial 71 with value: 0.7097623361687353.


[0.70566502 0.65677358 0.71088083 0.69883392]


[I 2023-06-30 16:36:36,824] Trial 78 finished with value: 0.7006119260480862 and parameters: {'max_depth': 4, 'learning_rate': 0.041859676572609056, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 3.434854913148276, 'reg_alpha': 0.0012206688781284047}. Best is trial 71 with value: 0.7097623361687353.


[0.70771757 0.65769231 0.71548117 0.70061193]


[I 2023-06-30 16:36:37,725] Trial 79 finished with value: 0.7004234115962156 and parameters: {'max_depth': 10, 'learning_rate': 0.01817963461062786, 'n_estimators': 600, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 9.934007429660136, 'reg_alpha': 0.024050271091911136}. Best is trial 71 with value: 0.7097623361687353.


[0.70689655 0.66       0.71004098 0.70042341]


[I 2023-06-30 16:36:38,345] Trial 80 finished with value: 0.6946418496658276 and parameters: {'max_depth': 5, 'learning_rate': 0.03145728665670001, 'n_estimators': 600, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.8, 'reg_lambda': 4.2747742914184945, 'reg_alpha': 0.003537980320290991}. Best is trial 71 with value: 0.7097623361687353.


[0.70114943 0.65333333 0.70286885 0.69464185]


[I 2023-06-30 16:36:38,631] Trial 81 finished with value: 0.6929235200937419 and parameters: {'max_depth': 6, 'learning_rate': 0.030166895805357242, 'n_estimators': 200, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 3.04449763042967, 'reg_alpha': 0.010435175453641378}. Best is trial 71 with value: 0.7097623361687353.


[0.70032841 0.6476834  0.70780591 0.69292352]


[I 2023-06-30 16:36:39,284] Trial 82 finished with value: 0.7025825123687179 and parameters: {'max_depth': 6, 'learning_rate': 0.02373530517159475, 'n_estimators': 500, 'subsample': 0.5, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 4.70459421214177, 'reg_alpha': 0.01204215601081491}. Best is trial 71 with value: 0.7097623361687353.


[0.70935961 0.66124402 0.71532091 0.70258251]


[I 2023-06-30 16:36:39,830] Trial 83 finished with value: 0.6973746311084108 and parameters: {'max_depth': 7, 'learning_rate': 0.03617726673815298, 'n_estimators': 400, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 6.629130129507466, 'reg_alpha': 0.10969271982841326}. Best is trial 71 with value: 0.7097623361687353.


[0.70361248 0.65749526 0.70426829 0.69737463]


[I 2023-06-30 16:36:40,354] Trial 84 finished with value: 0.7063975566357087 and parameters: {'max_depth': 6, 'learning_rate': 0.04116033286788235, 'n_estimators': 400, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7, 'reg_lambda': 2.3109516183163548, 'reg_alpha': 0.0059335106549634085}. Best is trial 71 with value: 0.7097623361687353.


[0.71223317 0.66918358 0.71256281 0.70639756]


[I 2023-06-30 16:36:41,227] Trial 85 finished with value: 0.7003623817376964 and parameters: {'max_depth': 7, 'learning_rate': 0.053987124389407254, 'n_estimators': 700, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 2.549749634227807, 'reg_alpha': 0.0026908021323015004}. Best is trial 71 with value: 0.7097623361687353.


[0.70607553 0.66290019 0.704      0.70036238]


[I 2023-06-30 16:36:41,826] Trial 86 finished with value: 0.7026476108844718 and parameters: {'max_depth': 6, 'learning_rate': 0.042335750165921095, 'n_estimators': 500, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7, 'reg_lambda': 1.3978727735700547, 'reg_alpha': 0.006493046330977499}. Best is trial 71 with value: 0.7097623361687353.


[0.7089491  0.6631829  0.71151886 0.70264761]


[I 2023-06-30 16:36:42,360] Trial 87 finished with value: 0.6969284350316812 and parameters: {'max_depth': 5, 'learning_rate': 0.03923301237581104, 'n_estimators': 500, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 1.910516524514005, 'reg_alpha': 0.06219424034033487}. Best is trial 71 with value: 0.7097623361687353.


[0.70361248 0.65520535 0.70721649 0.69692844]


[I 2023-06-30 16:36:43,337] Trial 88 finished with value: 0.69616623990973 and parameters: {'max_depth': 9, 'learning_rate': 0.02588646818891023, 'n_estimators': 700, 'subsample': 0.7, 'colsample_bytree': 0.7, 'reg_lambda': 5.251905440171608, 'reg_alpha': 0.20760406771632087}. Best is trial 71 with value: 0.7097623361687353.


[0.70279146 0.65458015 0.70576132 0.69616624]


[I 2023-06-30 16:36:43,938] Trial 89 finished with value: 0.6940708814339033 and parameters: {'max_depth': 7, 'learning_rate': 0.02173092527546343, 'n_estimators': 400, 'subsample': 0.4, 'colsample_bytree': 1.0, 'reg_lambda': 7.35365101358397, 'reg_alpha': 0.02688407994616989}. Best is trial 71 with value: 0.7097623361687353.


[0.70032841 0.65370019 0.70020325 0.69407088]


[I 2023-06-30 16:36:44,759] Trial 90 finished with value: 0.7037312299279577 and parameters: {'max_depth': 6, 'learning_rate': 0.0452920649292249, 'n_estimators': 600, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.7, 'reg_lambda': 0.9199974090230842, 'reg_alpha': 0.0370277274506562}. Best is trial 71 with value: 0.7097623361687353.


[0.7089491  0.6685367  0.7044335  0.70373123]


[I 2023-06-30 16:36:45,278] Trial 91 finished with value: 0.7027086407429911 and parameters: {'max_depth': 6, 'learning_rate': 0.034259198846812604, 'n_estimators': 400, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 3.4939860101298525, 'reg_alpha': 0.009396734961270744}. Best is trial 71 with value: 0.7097623361687353.


[0.70977011 0.66025949 0.71786834 0.70270864]


[I 2023-06-30 16:36:45,799] Trial 92 finished with value: 0.6994062472875618 and parameters: {'max_depth': 6, 'learning_rate': 0.02908904612859085, 'n_estimators': 400, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 3.463079572917764, 'reg_alpha': 0.004499107534044796}. Best is trial 71 with value: 0.7097623361687353.


[0.70607553 0.65807068 0.71030928 0.69940625]


[I 2023-06-30 16:36:46,058] Trial 93 finished with value: 0.6870768596476 and parameters: {'max_depth': 3, 'learning_rate': 0.03377550950382973, 'n_estimators': 400, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 2.025036817662758, 'reg_alpha': 0.013886618643261371}. Best is trial 71 with value: 0.7097623361687353.


[0.69499179 0.63879436 0.70418006 0.68707686]


[I 2023-06-30 16:36:46,677] Trial 94 finished with value: 0.7039821304574255 and parameters: {'max_depth': 6, 'learning_rate': 0.03893140719653088, 'n_estimators': 500, 'subsample': 0.4, 'colsample_bytree': 0.7, 'reg_lambda': 7.840808461770476, 'reg_alpha': 0.0070457555592607374}. Best is trial 71 with value: 0.7097623361687353.


[0.71018062 0.66508539 0.71239837 0.70398213]


[I 2023-06-30 16:36:47,023] Trial 95 finished with value: 0.6854914395451783 and parameters: {'max_depth': 5, 'learning_rate': 0.028368802677727113, 'n_estimators': 300, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 5.23922507989078, 'reg_alpha': 0.01951343879151807}. Best is trial 71 with value: 0.7097623361687353.


[0.69252874 0.64076739 0.69510926 0.68549144]


[I 2023-06-30 16:36:47,733] Trial 96 finished with value: 0.6939474654977866 and parameters: {'max_depth': 6, 'learning_rate': 0.052700157765101174, 'n_estimators': 600, 'subsample': 0.5, 'colsample_bytree': 0.9000000000000001, 'reg_lambda': 1.109805119363104, 'reg_alpha': 0.009095259146555868}. Best is trial 71 with value: 0.7097623361687353.


[0.69909688 0.65795614 0.69185476 0.69394747]


[I 2023-06-30 16:36:48,526] Trial 97 finished with value: 0.6994754144605503 and parameters: {'max_depth': 8, 'learning_rate': 0.06179137262174368, 'n_estimators': 600, 'subsample': 0.2, 'colsample_bytree': 0.6000000000000001, 'reg_lambda': 2.389187749787576, 'reg_alpha': 0.0034268916384208314}. Best is trial 71 with value: 0.7097623361687353.


[0.7044335  0.66480447 0.69726562 0.69947541]


[I 2023-06-30 16:36:49,622] Trial 98 finished with value: 0.6969962459855914 and parameters: {'max_depth': 7, 'learning_rate': 0.04514443761299736, 'n_estimators': 900, 'subsample': 0.4, 'colsample_bytree': 0.8, 'reg_lambda': 7.919032493539401, 'reg_alpha': 0.005224794246585099}. Best is trial 71 with value: 0.7097623361687353.


[0.70238095 0.66042155 0.69732938 0.69699625]


[I 2023-06-30 16:36:50,333] Trial 99 finished with value: 0.6965486936897839 and parameters: {'max_depth': 5, 'learning_rate': 0.03127513623855104, 'n_estimators': 700, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7, 'reg_lambda': 4.1588797644185584, 'reg_alpha': 0.013310898396894963}. Best is trial 71 with value: 0.7097623361687353.


[0.70279146 0.65654649 0.70325203 0.69654869]
[1]	valid_0's binary_logloss: 0.685002
[2]	valid_0's binary_logloss: 0.680579
[3]	valid_0's binary_logloss: 0.676365
[4]	valid_0's binary_logloss: 0.671677
[5]	valid_0's binary_logloss: 0.667223
[6]	valid_0's binary_logloss: 0.663739
[7]	valid_0's binary_logloss: 0.660104
[8]	valid_0's binary_logloss: 0.656386
[9]	valid_0's binary_logloss: 0.653585
[10]	valid_0's binary_logloss: 0.650407
[11]	valid_0's binary_logloss: 0.647244
[12]	valid_0's binary_logloss: 0.644834
[13]	valid_0's binary_logloss: 0.642224
[14]	valid_0's binary_logloss: 0.639548
[15]	valid_0's binary_logloss: 0.63699
[16]	valid_0's binary_logloss: 0.63506
[17]	valid_0's binary_logloss: 0.632696
[18]	valid_0's binary_logloss: 0.63055
[19]	valid_0's binary_logloss: 0.628944
[20]	valid_0's binary_logloss: 0.626891
[21]	valid_0's binary_logloss: 0.624873
[22]	valid_0's binary_logloss: 0.623557
[23]	valid_0's binary_logloss: 0.621813
[24]	valid_0's binary_logloss: 0.620124
[25]	v



[122]	valid_0's binary_logloss: 0.582086
[123]	valid_0's binary_logloss: 0.582069
[124]	valid_0's binary_logloss: 0.582048
[125]	valid_0's binary_logloss: 0.582089
[126]	valid_0's binary_logloss: 0.582056
[127]	valid_0's binary_logloss: 0.582099
[128]	valid_0's binary_logloss: 0.582095
[129]	valid_0's binary_logloss: 0.58209
[130]	valid_0's binary_logloss: 0.582048
[131]	valid_0's binary_logloss: 0.581884
[132]	valid_0's binary_logloss: 0.581853
[133]	valid_0's binary_logloss: 0.581813
[134]	valid_0's binary_logloss: 0.581809
[135]	valid_0's binary_logloss: 0.58162
[136]	valid_0's binary_logloss: 0.581579
[137]	valid_0's binary_logloss: 0.581597
[138]	valid_0's binary_logloss: 0.581305
[139]	valid_0's binary_logloss: 0.581463
[140]	valid_0's binary_logloss: 0.581438
[141]	valid_0's binary_logloss: 0.581293
[142]	valid_0's binary_logloss: 0.581236
[143]	valid_0's binary_logloss: 0.58128
[144]	valid_0's binary_logloss: 0.581267
[145]	valid_0's binary_logloss: 0.580992
[146]	valid_0's bin

[504]	valid_0's binary_logloss: 0.569398
[505]	valid_0's binary_logloss: 0.569414
[506]	valid_0's binary_logloss: 0.569378
[507]	valid_0's binary_logloss: 0.569234
[508]	valid_0's binary_logloss: 0.569145
[509]	valid_0's binary_logloss: 0.569133
[510]	valid_0's binary_logloss: 0.5691
[511]	valid_0's binary_logloss: 0.569032
[512]	valid_0's binary_logloss: 0.569002
[513]	valid_0's binary_logloss: 0.568963
[514]	valid_0's binary_logloss: 0.56891
[515]	valid_0's binary_logloss: 0.5689
[516]	valid_0's binary_logloss: 0.568888
[517]	valid_0's binary_logloss: 0.568911
[518]	valid_0's binary_logloss: 0.568863
[519]	valid_0's binary_logloss: 0.568844
[520]	valid_0's binary_logloss: 0.568792
[521]	valid_0's binary_logloss: 0.568807
[522]	valid_0's binary_logloss: 0.568813
[523]	valid_0's binary_logloss: 0.568832
[524]	valid_0's binary_logloss: 0.568793
[525]	valid_0's binary_logloss: 0.568768
[526]	valid_0's binary_logloss: 0.568755
[527]	valid_0's binary_logloss: 0.568735
[528]	valid_0's binar

In [58]:
thresholds = [0.25, 0.3, 0.35, 0.40, 0.45, 0.5, 0.55, 0.60, 0.65, .70, .75, 0.8, 0.85, 0.9]

for t in thresholds:
    preds = []
    for i in range(len(y_prob_test)):
        if y_prob_test[i] >= t:
            preds.append(1.)
        else:
            preds.append(0.)
    
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    cm = confusion_matrix(y_test, preds)
    
    print(f"Threshold: {t}")
    print("Confusion Matrix:")
    print(cm)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("-----------------------")

Threshold: 0.25
Confusion Matrix:
[[ 488  824]
 [ 101 1023]]
Precision: 0.5538711423930699
Recall: 0.9101423487544484
F1 Score: 0.6886570178391115
-----------------------
Threshold: 0.3
Confusion Matrix:
[[621 691]
 [157 967]]
Precision: 0.583232810615199
Recall: 0.8603202846975089
F1 Score: 0.6951833213515457
-----------------------
Threshold: 0.35
Confusion Matrix:
[[751 561]
 [227 897]]
Precision: 0.6152263374485597
Recall: 0.7980427046263345
F1 Score: 0.6948102246320682
-----------------------
Threshold: 0.4
Confusion Matrix:
[[859 453]
 [297 827]]
Precision: 0.64609375
Recall: 0.7357651245551602
F1 Score: 0.6880199667221298
-----------------------
Threshold: 0.45
Confusion Matrix:
[[949 363]
 [368 756]]
Precision: 0.675603217158177
Recall: 0.6725978647686833
F1 Score: 0.6740971912617031
-----------------------
Threshold: 0.5
Confusion Matrix:
[[1029  283]
 [ 435  689]]
Precision: 0.7088477366255144
Recall: 0.6129893238434164
F1 Score: 0.6574427480916032
-----------------------
Thr

In [59]:
validation_set = y_test.reset_index(drop = True)

for t in [0.25, 0.3, 0.35, 0.40, 0.45, 0.5, 0.55, 0.60, 0.65, .70, .75, 0.8, 0.85, 0.9]:
    results = get_profits(odds, y_prob_test, t, validation_set)
    print(f"At threshold {t} for predictions: \n Profit: {results[0]} \n Number of bets: {results[1]} \n Return %: {results[2]} \n")

print(get_value_strat1(odds, preds, 0.05, validation_set))
print(get_value_strat2(odds, preds, 0.05, validation_set))

At threshold 0.25 for predictions: 
 Profit: 139.7093333333334 
 Number of bets: 1847 
 Return %: 7.564121999639058 

At threshold 0.3 for predictions: 
 Profit: 159.76433333333335 
 Number of bets: 1658 
 Return %: 9.635967028548453 

At threshold 0.35 for predictions: 
 Profit: 161.61099999999996 
 Number of bets: 1458 
 Return %: 11.084430727023317 

At threshold 0.4 for predictions: 
 Profit: 175.05599999999998 
 Number of bets: 1280 
 Return %: 13.676249999999998 

At threshold 0.45 for predictions: 
 Profit: 154.776 
 Number of bets: 1119 
 Return %: 13.831635388739945 

At threshold 0.5 for predictions: 
 Profit: 146.9543333333334 
 Number of bets: 972 
 Return %: 15.118758573388211 

At threshold 0.55 for predictions: 
 Profit: 146.01433333333347 
 Number of bets: 836 
 Return %: 17.46582934609252 

At threshold 0.6 for predictions: 
 Profit: 128.82600000000002 
 Number of bets: 691 
 Return %: 18.643415340086833 

At threshold 0.65 for predictions: 
 Profit: 99.88833333333338 