# Imports and configs

In [89]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import mutual_info_classif, f_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
import optuna
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

In [3]:
data = pd.read_csv(r'modelling_data\concatenated_data\cleaned_data\all_games_1819_2324_cleaned.csv')

In [4]:
data.shape

(37339, 162)

In [5]:
data.head()

Unnamed: 0,attendance,home_team_name,away_team_name,game_week,home_team_goal_count,away_team_goal_count,total_goal_count,total_goals_at_half_time,home_team_goal_count_half_time,away_team_goal_count_half_time,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,home_team_xg,away_team_xg,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_ht_fhg_percentage_pre_match,over_05_ht_fhg_percentage_pre_match,over_15_2hg_percentage_pre_match,over_05_2hg_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,league,date,time,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,result,season,home_team_cumulative_goals_scored_pre_game,away_team_cumulative_goals_scored_pre_game,home_team_cumulative_goals_conceded_pre_game,away_team_cumulative_goals_conceded_pre_game,home_team_average_goals_scored_per_game_pre_game,away_team_average_goals_scored_per_game_pre_game,home_team_average_goals_conceded_per_game_pre_game,away_team_average_goals_conceded_per_game_pre_game,home_team_average_first_half_goals_scored_pre_game,home_team_average_second_half_goals_scored_pre_game,away_team_average_first_half_goals_scored_pre_game,away_team_average_second_half_goals_scored_pre_game,home_team_average_first_half_goals_conceded_pre_game,home_team_average_second_half_goals_conceded_pre_game,away_team_average_first_half_goals_conceded_pre_game,away_team_average_second_half_goals_conceded_pre_game,home_team_average_first_half_goals_total_pre_game,home_team_average_second_half_goals_total_pre_game,away_team_average_first_half_goals_total_pre_game,away_team_average_second_half_goals_total_pre_game,home_team_average_corners_total_per_game_pre_game,away_team_average_corners_total_per_game_pre_game,home_team_average_corners_per_game_pre_game,away_team_average_corners_per_game_pre_game,home_team_average_yellow_cards_total_per_game_pre_game,away_team_average_yellow_cards_total_per_game_pre_game,home_team_average_yellow_cards_per_game_pre_game,away_team_average_yellow_cards_per_game_pre_game,home_team_cumulative_red_cards_pre_game,away_team_cumulative_red_cards_pre_game,home_team_average_red_cards_total_per_game_pre_game,away_team_average_red_cards_total_per_game_pre_game,home_team_average_red_cards_per_game_pre_game,away_team_average_red_cards_per_game_pre_game,home_team_average_shots_per_game_pre_game,home_team_average_shots_on_target_per_game_pre_game,away_team_average_shots_per_game_pre_game,away_team_average_shots_on_target_per_game_pre_game,home_team_average_fouls_per_game_pre_game,away_team_average_fouls_per_game_pre_game,home_team_average_fouls_total_per_game_pre_game,away_team_average_fouls_total_per_game_pre_game,home_team_average_possession_per_game_pre_game,away_team_average_possession_per_game_pre_game,home_team_average_xg_per_game_pre_game,away_team_average_xg_per_game_pre_game,home_team_games_without_goals_pre_game,away_team_games_without_goals_pre_game,home_team_cumulative_btts_pre_game,away_team_cumulative_btts_pre_game,home_team_cumulative_clean_sheets_pre_game,away_team_cumulative_clean_sheets_pre_game,home_team_cumulative_wins_pre_game,home_team_cumulative_draws_pre_game,home_team_cumulative_losses_pre_game,away_team_cumulative_wins_pre_game,away_team_cumulative_draws_pre_game,away_team_cumulative_losses_pre_game,home_team_average_points_per_game_pre_game,away_team_average_points_per_game_pre_game,home_team_wins_in_last_5_games,home_team_draws_in_last_5_games,home_team_losses_in_last_5_games,away_team_wins_in_last_5_games,away_team_draws_in_last_5_games,away_team_losses_in_last_5_games,home_team_average_goals_scored_in_last_5_games,away_team_average_goals_scored_in_last_5_games,home_team_average_goals_conceded_in_last_5_games,away_team_average_goals_conceded_in_last_5_games,average_total_corners_in_home_team_games_in_last_5_games,average_total_corners_in_away_team_games_in_last_5_games,average_corners_by_home_team_in_last_5_games,average_corners_by_away_team_in_last_5_games,average_yellow_cards_by_home_team_in_last_5_games,average_yellow_cards_by_away_team_in_last_5_games,average_red_cards_by_home_team_in_last_5_games,average_red_cards_by_away_team_in_last_5_games,average_ball_possession_by_home_team_in_last_5_games,average_ball_possession_by_away_team_in_last_5_games,average_xg_by_home_team_in_last_5_games,average_xg_by_away_team_in_last_5_games,average_shots_by_home_team_in_last_5_games,average_shots_by_away_team_in_last_5_games,average_shots_on_target_by_home_team_in_last_5_games,average_shots_on_target_by_away_team_in_last_5_games,average_total_fouls_in_home_team_games_in_last_5_games,average_total_fouls_in_away_team_games_in_last_5_games,average_fouls_by_home_team_in_last_5_games,average_fouls_by_away_team_in_last_5_games,h2h_home_team_wins_pre_game,h2h_away_team_wins_pre_game,h2h_draws_pre_game,average_goals_h2h,average_yellow_cards_h2h,average_red_cards_h2h,average_corners_h2h
0,13155.0,Austria Wien,Wacker Innsbruck,1.0,2,1,3,2,1,1,6,1,1,0,2,0,0,1,1,1,13,7,8,5,5,2,7,15,55,45,2.01,1.24,0,0,0,0,0,0,0,0,0,0.0,0.0,1.54,4.2,5.75,1.21,1.69,2.75,4.75,1.83,2.0,austria,27.07.2018,18:45:00,7,3,0,3,20,13,7,22,1,18/19,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
1,10785.0,Sturm Graz,Hartberg,1.0,3,2,5,3,2,1,7,6,3,0,1,0,2,1,0,1,13,8,8,4,5,4,9,21,60,40,1.96,1.36,0,0,0,0,0,0,0,0,0,0.0,0.0,1.49,4.3,6.25,1.22,1.71,2.8,4.25,1.95,1.87,austria,28.07.2018,15:00:00,13,4,0,4,21,12,9,30,1,18/19,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
2,4523.0,Rheindorf Altach,Mattersburg,1.0,2,3,5,2,1,1,3,9,4,0,1,0,2,2,0,1,12,8,4,5,8,3,20,15,54,46,1.5,1.28,0,0,0,0,0,0,0,0,0,0.0,0.0,2.35,3.3,3.0,1.32,2.0,3.55,5.75,1.87,1.95,austria,28.07.2018,15:00:00,12,5,0,5,20,9,11,35,2,18/19,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
3,11532.0,Salzburg,LASK Linz,1.0,3,1,4,3,3,0,4,4,1,0,4,0,1,0,1,3,12,9,7,5,5,4,8,19,62,38,1.51,1.09,0,0,0,0,0,0,0,0,0,0.0,0.0,1.44,4.3,7.0,1.25,1.8,3.05,4.25,2.1,1.77,austria,29.07.2018,15:00:00,8,5,0,5,21,12,9,27,1,18/19,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
4,5200.0,Admira,Rapid Wien,1.0,0,3,3,3,0,3,4,5,2,0,2,0,0,2,0,2,6,15,5,9,1,6,9,12,31,69,0.84,1.92,0,0,0,0,0,0,0,0,0,0.0,0.0,6.45,4.65,1.43,1.16,1.53,2.35,4.25,1.77,2.05,austria,29.07.2018,15:00:00,9,4,0,4,21,14,7,21,2,18/19,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37339 entries, 0 to 37338
Columns: 162 entries, attendance to average_corners_h2h
dtypes: float64(105), int64(51), object(6)
memory usage: 46.1+ MB


In [8]:
data.groupby(['league', 'season']).apply(lambda x: x['game_week'].isna().sum())

league              season
austria             18/19     0
                    19/20     0
                    20/21     0
                    21/22     0
                    22/23     0
                    23/24     0
belgium pro league  18/19     0
                    19/20     0
                    20/21     0
                    21/22     0
                    22/23     0
                    23/24     0
bundesliga          18/19     0
                    19/20     0
                    20/21     0
                    21/22     0
                    22/23     0
                    23/24     0
czech first league  18/19     0
                    19/20     0
                    20/21     0
                    21/22     0
                    22/23     0
                    23/24     0
denmark             18/19     0
                    19/20     0
                    20/21     0
                    21/22     0
                    22/23     0
                    23/24     0
ekstraklasa  

In [9]:
data.dtypes

attendance                                                  float64
home_team_name                                               object
away_team_name                                               object
game_week                                                   float64
home_team_goal_count                                          int64
away_team_goal_count                                          int64
total_goal_count                                              int64
total_goals_at_half_time                                      int64
home_team_goal_count_half_time                                int64
away_team_goal_count_half_time                                int64
home_team_corner_count                                        int64
away_team_corner_count                                        int64
home_team_yellow_cards                                        int64
home_team_red_cards                                           int64
away_team_yellow_cards                          

In [10]:
data['result'] = data['result'].astype('category')

In [11]:
def scale_data_arcsinh_std(df):
    num_features = df.select_dtypes(include='number').columns
    df[num_features] = np.arcsinh(df[num_features])
    df[num_features] = StandardScaler().fit_transform(df[num_features])
    return df

In [12]:
data = scale_data_arcsinh_std(data)

In [13]:
data.tail()

Unnamed: 0,attendance,home_team_name,away_team_name,game_week,home_team_goal_count,away_team_goal_count,total_goal_count,total_goals_at_half_time,home_team_goal_count_half_time,away_team_goal_count_half_time,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,home_team_xg,away_team_xg,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_ht_fhg_percentage_pre_match,over_05_ht_fhg_percentage_pre_match,over_15_2hg_percentage_pre_match,over_05_2hg_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,league,date,time,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,result,season,home_team_cumulative_goals_scored_pre_game,away_team_cumulative_goals_scored_pre_game,home_team_cumulative_goals_conceded_pre_game,away_team_cumulative_goals_conceded_pre_game,home_team_average_goals_scored_per_game_pre_game,away_team_average_goals_scored_per_game_pre_game,home_team_average_goals_conceded_per_game_pre_game,away_team_average_goals_conceded_per_game_pre_game,home_team_average_first_half_goals_scored_pre_game,home_team_average_second_half_goals_scored_pre_game,away_team_average_first_half_goals_scored_pre_game,away_team_average_second_half_goals_scored_pre_game,home_team_average_first_half_goals_conceded_pre_game,home_team_average_second_half_goals_conceded_pre_game,away_team_average_first_half_goals_conceded_pre_game,away_team_average_second_half_goals_conceded_pre_game,home_team_average_first_half_goals_total_pre_game,home_team_average_second_half_goals_total_pre_game,away_team_average_first_half_goals_total_pre_game,away_team_average_second_half_goals_total_pre_game,home_team_average_corners_total_per_game_pre_game,away_team_average_corners_total_per_game_pre_game,home_team_average_corners_per_game_pre_game,away_team_average_corners_per_game_pre_game,home_team_average_yellow_cards_total_per_game_pre_game,away_team_average_yellow_cards_total_per_game_pre_game,home_team_average_yellow_cards_per_game_pre_game,away_team_average_yellow_cards_per_game_pre_game,home_team_cumulative_red_cards_pre_game,away_team_cumulative_red_cards_pre_game,home_team_average_red_cards_total_per_game_pre_game,away_team_average_red_cards_total_per_game_pre_game,home_team_average_red_cards_per_game_pre_game,away_team_average_red_cards_per_game_pre_game,home_team_average_shots_per_game_pre_game,home_team_average_shots_on_target_per_game_pre_game,away_team_average_shots_per_game_pre_game,away_team_average_shots_on_target_per_game_pre_game,home_team_average_fouls_per_game_pre_game,away_team_average_fouls_per_game_pre_game,home_team_average_fouls_total_per_game_pre_game,away_team_average_fouls_total_per_game_pre_game,home_team_average_possession_per_game_pre_game,away_team_average_possession_per_game_pre_game,home_team_average_xg_per_game_pre_game,away_team_average_xg_per_game_pre_game,home_team_games_without_goals_pre_game,away_team_games_without_goals_pre_game,home_team_cumulative_btts_pre_game,away_team_cumulative_btts_pre_game,home_team_cumulative_clean_sheets_pre_game,away_team_cumulative_clean_sheets_pre_game,home_team_cumulative_wins_pre_game,home_team_cumulative_draws_pre_game,home_team_cumulative_losses_pre_game,away_team_cumulative_wins_pre_game,away_team_cumulative_draws_pre_game,away_team_cumulative_losses_pre_game,home_team_average_points_per_game_pre_game,away_team_average_points_per_game_pre_game,home_team_wins_in_last_5_games,home_team_draws_in_last_5_games,home_team_losses_in_last_5_games,away_team_wins_in_last_5_games,away_team_draws_in_last_5_games,away_team_losses_in_last_5_games,home_team_average_goals_scored_in_last_5_games,away_team_average_goals_scored_in_last_5_games,home_team_average_goals_conceded_in_last_5_games,away_team_average_goals_conceded_in_last_5_games,average_total_corners_in_home_team_games_in_last_5_games,average_total_corners_in_away_team_games_in_last_5_games,average_corners_by_home_team_in_last_5_games,average_corners_by_away_team_in_last_5_games,average_yellow_cards_by_home_team_in_last_5_games,average_yellow_cards_by_away_team_in_last_5_games,average_red_cards_by_home_team_in_last_5_games,average_red_cards_by_away_team_in_last_5_games,average_ball_possession_by_home_team_in_last_5_games,average_ball_possession_by_away_team_in_last_5_games,average_xg_by_home_team_in_last_5_games,average_xg_by_away_team_in_last_5_games,average_shots_by_home_team_in_last_5_games,average_shots_by_away_team_in_last_5_games,average_shots_on_target_by_home_team_in_last_5_games,average_shots_on_target_by_away_team_in_last_5_games,average_total_fouls_in_home_team_games_in_last_5_games,average_total_fouls_in_away_team_games_in_last_5_games,average_fouls_by_home_team_in_last_5_games,average_fouls_by_away_team_in_last_5_games,h2h_home_team_wins_pre_game,h2h_away_team_wins_pre_game,h2h_draws_pre_game,average_goals_h2h,average_yellow_cards_h2h,average_red_cards_h2h,average_corners_h2h
37334,-0.192869,Polessya,Shakhtar Donetsk,0.87787,0.592065,-1.278793,-0.194274,0.866375,1.560181,-0.786328,0.155951,0.670255,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.16772,-0.99119,-0.07541,-0.698319,-0.019866,-0.911584,0.036179,-1.369658,-0.255615,-0.113231,0.376978,-1.507232,-0.325961,0.394001,0.238821,0.129867,0.118859,0.598791,-0.002557,0.186051,0.238004,0.317742,0.562678,0.763683,1.427027,0.120548,-1.224542,0.096923,0.034392,0.097781,0.218537,0.347985,-0.155227,ukraine,25.05.2024,12:30:00,0.358729,-0.689047,-0.375163,-0.700765,-0.524615,-0.466516,-0.519767,-0.551543,1,23/24,0.741339,1.168672,0.547673,0.276929,0.026331,1.135308,-0.460823,-1.03373,-0.401265,0.299818,1.506539,0.627009,-0.820394,0.299818,-0.935231,0.627009,-0.401265,0.299818,1.506539,0.627009,-0.060707,0.069644,-0.087545,0.698479,0.719044,0.187212,0.63718,-0.162297,2.301445,1.808152,1.617745,1.624568,1.828476,1.280433,-0.435786,-0.478486,0.548132,0.867503,-0.037359,-0.040834,-0.002948,-0.032464,0.127269,0.239602,-0.909675,1.083226,0.777448,-0.438414,0.75388,0.824439,1.140696,1.482976,1.062093,0.965623,0.576227,1.536587,0.65749,-0.724806,0.56159,1.4146,0.478755,0.91416,-0.401331,1.406892,0.023353,-1.664258,-0.01641,1.318869,0.226655,-1.523345,0.247134,0.07444,0.476086,0.229863,0.562555,0.00566,0.647405,0.670842,0.18254,0.245359,0.333728,0.721363,0.29,0.494925,0.506879,0.706615,-0.157781,0.091506,-0.030673,0.022323,-1.061757,-1.113569,0.190406,-2.039406,0.461646,2.8887,0.444185
37335,-0.192869,Obolon-Brovar,Oleksandria,0.87787,-1.485214,1.40926,0.374982,0.013778,-0.89806,0.825919,-0.156352,-0.770792,-0.477338,-0.249313,-0.656416,-0.286451,0.623418,-1.305658,0.487027,-1.436481,0.341488,-0.704131,0.326005,0.297024,0.415769,-1.881651,-0.576203,0.115881,0.277901,-0.007832,0.47171,-0.56756,0.049227,-0.158113,0.050774,0.118859,0.430887,0.094232,0.028259,0.026908,-0.143423,0.000778,0.563231,0.953865,-0.775902,-1.035796,0.823043,1.07139,1.10842,0.929579,0.909548,-0.502757,ukraine,25.05.2024,12:30:00,-0.678143,-0.689047,-0.375163,-0.700765,-0.099309,0.253757,-0.351165,-0.128878,2,23/24,0.102324,0.45536,0.759512,0.690158,-1.298756,-0.639192,0.055734,-0.106908,-1.009456,-1.122979,-0.780212,-0.39182,-0.082002,-1.122979,0.176158,-0.39182,-1.009456,-1.122979,-0.780212,-0.39182,-0.201822,-0.552044,-0.416637,-0.652222,0.809474,0.459701,0.63718,0.470632,1.992662,0.510472,1.617745,0.672807,1.537109,-0.250447,-0.209963,-0.440399,-0.307626,-0.161817,0.369152,-0.004948,0.326227,0.144302,0.003446,0.070584,-0.721487,-0.817354,1.619413,1.241234,0.301298,0.595626,1.140696,1.323386,0.121157,1.307936,1.055081,0.439246,1.307311,0.897388,-0.605113,-0.26173,-0.353634,1.506126,-0.401331,-0.403765,1.503186,-0.353134,-1.064049,-0.060457,-0.737787,-0.02491,0.279275,-0.579531,0.409255,-0.179062,-0.504355,-0.223136,-0.540041,-0.536578,0.124234,-0.009015,0.095353,-1.101128,0.364427,-0.556921,-0.335952,-0.922641,0.19991,-0.283971,0.166455,-0.469372,-1.061757,-1.113569,0.190406,0.956008,0.950873,-0.573772,0.612351
37336,-0.192869,Hirnyk,Minai,0.87787,1.131389,-1.278793,0.374982,0.866375,1.560181,-0.786328,0.413196,-0.255481,0.334112,-0.249313,-0.656416,-0.286451,-0.871317,0.769514,-0.961539,-0.16772,0.982351,-0.984852,0.557758,-0.421958,1.187505,-1.12814,0.77483,0.625229,0.277901,-0.007832,1.605756,-0.347382,0.231531,0.282956,0.201924,0.084388,0.26997,0.450236,0.307846,0.096296,0.201359,0.044662,0.386767,-1.056529,0.735121,1.433966,0.163294,-0.089325,0.264134,0.326608,0.131132,0.062093,ukraine,25.05.2024,12:30:00,0.014455,-0.150746,-0.375163,-0.192155,0.288844,0.101548,0.475139,0.600972,1,23/24,0.972293,0.45536,0.547673,0.952623,0.621221,-0.639192,-0.460823,0.58605,0.798559,0.299818,-0.902187,-0.291494,0.264641,0.299818,0.176158,-0.291494,0.798559,0.299818,-0.902187,-0.291494,0.035199,0.18008,0.358077,-0.696099,0.149064,0.442468,-0.42902,0.726073,1.581877,0.974515,0.669815,1.060179,0.648577,0.05858,-0.062316,0.195794,-0.261132,-0.096456,0.266549,0.083676,0.231609,0.140408,0.141021,-0.029389,-0.141691,-0.792399,0.437954,1.241234,0.75388,0.890664,1.238794,0.422756,1.267876,0.657838,0.44504,0.106586,1.204779,1.133789,0.871003,-0.706811,0.478755,-1.365879,0.996479,0.994622,0.023353,-0.353134,-0.01641,0.933513,-0.063664,0.532895,0.076453,0.246041,0.409255,-0.950931,-0.780463,0.409819,-0.540041,-0.536578,0.163428,-0.196036,0.20201,0.042286,0.01266,0.09866,0.093081,0.5341,0.103436,0.081214,-0.057528,-0.200838,-0.038338,-0.080772,0.190406,-1.145456,0.881462,1.882108,0.709633
37337,-0.192869,Chornomorets,Dnipro-1,0.87787,-1.485214,0.855209,-0.194274,0.013778,-0.89806,0.825919,-0.156352,0.124345,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.16772,-0.355227,-0.07541,-0.698319,0.558047,0.021325,-0.62584,-0.102882,0.692182,0.139412,0.152161,-0.709774,0.100418,0.180978,0.155598,0.050774,0.213598,0.598791,0.206633,0.307846,0.180323,0.118039,-0.23917,0.334139,1.611764,0.120548,-1.270137,0.229022,0.233736,0.390503,0.415568,0.536738,-0.306337,ukraine,25.05.2024,12:30:00,-0.187242,-0.689047,-0.375163,-0.700765,-0.255119,-0.066542,-0.351165,0.321028,2,23/24,0.765,0.7606,0.911052,0.459163,0.084913,0.059736,0.454617,-0.652622,0.278557,-0.174217,0.251701,-0.192706,0.035304,-0.174217,-0.550507,-0.192706,0.278557,-0.174217,0.251701,-0.192706,-0.380516,-0.147966,-0.825305,0.115229,0.225834,0.007552,0.085743,0.063453,1.581877,-0.18565,1.055628,-0.325896,0.946882,-0.560206,-0.343314,-0.329881,-0.068888,0.353855,-0.05294,0.18551,0.09765,0.130623,0.12819,0.157897,-0.595729,0.008122,1.139891,0.224576,0.677867,0.890664,0.77458,1.134857,0.802454,-0.475327,1.32038,1.056513,1.204779,0.306008,-0.228651,0.62425,-0.353634,-1.365879,1.408579,-0.403765,0.911282,0.483281,-0.680175,-0.718912,1.158652,-0.347128,-0.034996,-0.157652,-0.231371,-0.473621,-0.504355,0.409819,-0.540041,-0.536578,0.286719,-0.042947,-0.574667,-0.809865,-0.441981,-0.738334,-0.242218,-0.922641,0.143063,-0.213957,-0.084942,-0.112684,-1.061757,1.596154,-0.982839,0.810326,0.729906,0.206781,0.261494
37338,-0.192869,Metalist 1925 Kharkiv,Veres Rivne,0.87787,-0.216986,0.855209,0.374982,0.013778,-0.89806,0.825919,0.989337,-0.255481,0.875036,-0.249313,0.175667,-0.286451,-0.871317,1.308291,-0.961539,0.641671,0.824085,1.09047,0.053178,0.558047,1.187505,1.305474,-4.551125,-4.547302,-8.015749,-7.975622,0.651602,1.127705,0.352796,0.238821,0.201924,0.118859,0.476964,0.206633,0.224453,0.180323,0.245545,-0.085738,0.559864,0.172241,-0.680082,-0.401039,-5.495275,-0.068483,-4.781756,-3.634225,-5.391384,-5.35911,ukraine,25.05.2024,12:30:00,0.508199,0.558721,-0.375163,0.478181,0.850534,0.253757,1.228434,-4.968547,2,23/24,0.584369,0.519176,1.090926,0.913905,-0.34673,-0.502116,0.954108,0.479635,-0.401265,-0.273709,-0.07797,-0.808043,1.492664,-0.273709,0.515781,-0.808043,-0.401265,-0.273709,-0.07797,-0.808043,-0.015855,-0.466156,-0.897072,-0.630594,0.300225,0.641493,0.451556,0.928733,1.992662,1.316553,0.863746,0.672807,1.537109,0.366519,-0.385387,-0.157045,-0.698046,-0.878451,0.033344,-5.578295,0.119641,-6.142953,0.090802,-5.325564,-0.716912,-1.069509,1.032644,1.329503,1.011844,0.824439,-0.079106,0.422756,0.121157,0.965623,1.260398,0.106586,1.204779,1.133789,-0.808382,-0.706811,-0.353634,0.91416,0.437382,-0.403765,0.911282,0.483281,0.269449,0.223115,1.348482,0.998873,0.401986,-0.37893,0.268099,-0.004939,0.887955,-0.473455,0.647405,-0.536578,0.153753,-2.188868,0.403914,-0.087752,0.238334,0.041237,0.62708,-0.020176,0.181291,-2.232973,0.233371,-2.610984,-1.061757,1.341022,0.190406,0.560951,-0.340548,-0.573772,-0.348759


In [15]:
#conversion float64 and int64 to float32 and int32 to optimize computing
data[data.select_dtypes(include='float64').columns] = data[data.select_dtypes(include='float64').columns].astype('float32')
data[data.select_dtypes(include='int64').columns] = data[data.select_dtypes(include='int64').columns].astype('int32')

In [16]:
data.dtypes

attendance                                                   float32
home_team_name                                                object
away_team_name                                                object
game_week                                                    float32
home_team_goal_count                                         float32
away_team_goal_count                                         float32
total_goal_count                                             float32
total_goals_at_half_time                                     float32
home_team_goal_count_half_time                               float32
away_team_goal_count_half_time                               float32
home_team_corner_count                                       float32
away_team_corner_count                                       float32
home_team_yellow_cards                                       float32
home_team_red_cards                                          float32
away_team_yellow_cards            

# Feature selection

In [17]:
#dropping info leaking features
X = data.drop([
    'home_team_name',
    'away_team_name',
    'home_team_goal_count',
    'away_team_goal_count',
    'total_goal_count',
    'total_goals_at_half_time',
    'home_team_goal_count_half_time',
    'away_team_goal_count_half_time',
    'date',
    'time',
    'result',
    'league',
    'season'
], axis=1)

In [18]:
y = data['result'].astype(int)

In [20]:
X

Unnamed: 0,attendance,game_week,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,home_team_xg,away_team_xg,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_ht_fhg_percentage_pre_match,over_05_ht_fhg_percentage_pre_match,over_15_2hg_percentage_pre_match,over_05_2hg_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,home_team_cumulative_goals_scored_pre_game,away_team_cumulative_goals_scored_pre_game,home_team_cumulative_goals_conceded_pre_game,away_team_cumulative_goals_conceded_pre_game,home_team_average_goals_scored_per_game_pre_game,away_team_average_goals_scored_per_game_pre_game,home_team_average_goals_conceded_per_game_pre_game,away_team_average_goals_conceded_per_game_pre_game,home_team_average_first_half_goals_scored_pre_game,home_team_average_second_half_goals_scored_pre_game,away_team_average_first_half_goals_scored_pre_game,away_team_average_second_half_goals_scored_pre_game,home_team_average_first_half_goals_conceded_pre_game,home_team_average_second_half_goals_conceded_pre_game,away_team_average_first_half_goals_conceded_pre_game,away_team_average_second_half_goals_conceded_pre_game,home_team_average_first_half_goals_total_pre_game,home_team_average_second_half_goals_total_pre_game,away_team_average_first_half_goals_total_pre_game,away_team_average_second_half_goals_total_pre_game,home_team_average_corners_total_per_game_pre_game,away_team_average_corners_total_per_game_pre_game,home_team_average_corners_per_game_pre_game,away_team_average_corners_per_game_pre_game,home_team_average_yellow_cards_total_per_game_pre_game,away_team_average_yellow_cards_total_per_game_pre_game,home_team_average_yellow_cards_per_game_pre_game,away_team_average_yellow_cards_per_game_pre_game,home_team_cumulative_red_cards_pre_game,away_team_cumulative_red_cards_pre_game,home_team_average_red_cards_total_per_game_pre_game,away_team_average_red_cards_total_per_game_pre_game,home_team_average_red_cards_per_game_pre_game,away_team_average_red_cards_per_game_pre_game,home_team_average_shots_per_game_pre_game,home_team_average_shots_on_target_per_game_pre_game,away_team_average_shots_per_game_pre_game,away_team_average_shots_on_target_per_game_pre_game,home_team_average_fouls_per_game_pre_game,away_team_average_fouls_per_game_pre_game,home_team_average_fouls_total_per_game_pre_game,away_team_average_fouls_total_per_game_pre_game,home_team_average_possession_per_game_pre_game,away_team_average_possession_per_game_pre_game,home_team_average_xg_per_game_pre_game,away_team_average_xg_per_game_pre_game,home_team_games_without_goals_pre_game,away_team_games_without_goals_pre_game,home_team_cumulative_btts_pre_game,away_team_cumulative_btts_pre_game,home_team_cumulative_clean_sheets_pre_game,away_team_cumulative_clean_sheets_pre_game,home_team_cumulative_wins_pre_game,home_team_cumulative_draws_pre_game,home_team_cumulative_losses_pre_game,away_team_cumulative_wins_pre_game,away_team_cumulative_draws_pre_game,away_team_cumulative_losses_pre_game,home_team_average_points_per_game_pre_game,away_team_average_points_per_game_pre_game,home_team_wins_in_last_5_games,home_team_draws_in_last_5_games,home_team_losses_in_last_5_games,away_team_wins_in_last_5_games,away_team_draws_in_last_5_games,away_team_losses_in_last_5_games,home_team_average_goals_scored_in_last_5_games,away_team_average_goals_scored_in_last_5_games,home_team_average_goals_conceded_in_last_5_games,away_team_average_goals_conceded_in_last_5_games,average_total_corners_in_home_team_games_in_last_5_games,average_total_corners_in_away_team_games_in_last_5_games,average_corners_by_home_team_in_last_5_games,average_corners_by_away_team_in_last_5_games,average_yellow_cards_by_home_team_in_last_5_games,average_yellow_cards_by_away_team_in_last_5_games,average_red_cards_by_home_team_in_last_5_games,average_red_cards_by_away_team_in_last_5_games,average_ball_possession_by_home_team_in_last_5_games,average_ball_possession_by_away_team_in_last_5_games,average_xg_by_home_team_in_last_5_games,average_xg_by_away_team_in_last_5_games,average_shots_by_home_team_in_last_5_games,average_shots_by_away_team_in_last_5_games,average_shots_on_target_by_home_team_in_last_5_games,average_shots_on_target_by_away_team_in_last_5_games,average_total_fouls_in_home_team_games_in_last_5_games,average_total_fouls_in_away_team_games_in_last_5_games,average_fouls_by_home_team_in_last_5_games,average_fouls_by_away_team_in_last_5_games,h2h_home_team_wins_pre_game,h2h_away_team_wins_pre_game,h2h_draws_pre_game,average_goals_h2h,average_yellow_cards_h2h,average_red_cards_h2h,average_corners_h2h
0,0.361523,-2.952938,0.413196,-1.543821,-0.477338,-0.249313,0.175667,-0.286451,-0.871317,-0.038716,0.487027,-0.167720,0.224473,-0.465673,0.759100,0.297024,-0.227154,-1.128140,-0.396684,0.478665,0.251220,0.025573,0.766704,-0.117468,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,-0.735041,0.330693,0.814896,-0.037778,-0.194898,-0.106397,-0.038335,0.230945,0.184005,-0.415458,-0.150746,-0.375163,-0.192155,-0.099309,0.520815,-0.710538,0.121808,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
1,0.294317,-2.952938,0.631711,0.670255,0.875036,-0.249313,-0.656416,-0.286451,1.576967,-0.038716,-0.961539,-0.167720,0.224473,-0.258505,0.759100,-0.019866,-0.227154,-0.255604,-0.102882,0.872805,0.380066,-0.149504,0.695202,0.119659,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,-0.789394,0.397598,0.948540,-0.003855,-0.152392,-0.060752,-0.210904,0.462157,-0.045211,0.645777,0.246025,-0.375163,0.182732,-0.027143,0.392818,-0.351165,0.423545,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
2,0.000320,-2.952938,-0.552625,1.222485,1.273740,-0.249313,-0.656416,-0.286451,1.576967,0.769514,-0.961539,-0.167720,0.098144,-0.258505,-0.278042,0.297024,0.415769,-0.625840,0.835195,0.478665,0.224049,0.058244,-0.038501,-0.036891,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,0.001545,-0.349427,-0.211075,0.326429,0.425030,0.548180,0.259917,0.309296,0.097278,0.508199,0.558721,-0.375163,0.478181,-0.099309,-0.066542,-0.063379,0.573556,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
3,0.316974,-2.952938,-0.156352,0.124345,-0.477338,-0.249313,1.139187,-0.286451,0.623418,-1.305658,0.487027,1.181221,0.098144,-0.075410,0.557758,0.297024,-0.227154,-0.255604,-0.240721,0.755526,0.428622,-0.225744,-0.020911,-0.434139,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,-0.845019,0.397598,1.130623,0.096923,0.034392,0.157122,-0.210904,0.735811,-0.230135,-0.187242,0.558721,-0.375163,0.478181,-0.027143,0.392818,-0.351165,0.321028,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
4,0.047510,-2.952938,-0.156352,0.423686,0.334112,-0.249313,0.175667,-0.286451,-0.871317,0.769514,-0.961539,0.641671,-0.991190,0.721106,0.053178,1.142650,-2.209907,0.276523,-0.102882,0.217585,-0.597655,0.661012,-1.408920,1.068318,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,1.921808,0.620658,-1.288675,-0.209920,-0.548878,-0.500193,-0.210904,0.110913,0.269014,0.014455,0.246025,-0.375163,0.182732,-0.027143,0.639376,-0.710538,0.076564,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37334,-0.192869,0.877870,0.155951,0.670255,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.167720,-0.991190,-0.075410,-0.698319,-0.019866,-0.911584,0.036179,-1.369658,-0.255615,-0.113231,0.376978,-1.507232,-0.325961,0.394001,0.238821,0.129867,0.118859,0.598791,-0.002557,0.186051,0.238004,0.317742,0.562678,0.763683,1.427027,0.120548,-1.224542,0.096923,0.034392,0.097781,0.218537,0.347985,-0.155227,0.358729,-0.689047,-0.375163,-0.700765,-0.524615,-0.466516,-0.519767,-0.551543,0.741339,1.168672,0.547673,0.276929,0.026331,1.135308,-0.460823,-1.033730,-0.401265,0.299818,1.506539,0.627009,-0.820394,0.299818,-0.935231,0.627009,-0.401265,0.299818,1.506539,0.627009,-0.060707,0.069644,-0.087545,0.698479,0.719044,0.187212,0.637180,-0.162297,2.301445,1.808152,1.617745,1.624568,1.828476,1.280433,-0.435786,-0.478486,0.548132,0.867503,-0.037359,-0.040834,-0.002948,-0.032464,0.127269,0.239602,-0.909675,1.083226,0.777448,-0.438414,0.753880,0.824439,1.140696,1.482976,1.062093,0.965623,0.576227,1.536587,0.657490,-0.724806,0.561590,1.414600,0.478755,0.914160,-0.401331,1.406892,0.023353,-1.664258,-0.016410,1.318869,0.226655,-1.523345,0.247134,0.074440,0.476086,0.229863,0.562555,0.005660,0.647405,0.670842,0.182540,0.245359,0.333728,0.721363,0.290000,0.494925,0.506879,0.706615,-0.157781,0.091506,-0.030673,0.022323,-1.061757,-1.113569,0.190406,-2.039406,0.461646,2.888700,0.444185
37335,-0.192869,0.877870,-0.156352,-0.770792,-0.477338,-0.249313,-0.656416,-0.286451,0.623418,-1.305658,0.487027,-1.436481,0.341488,-0.704131,0.326005,0.297024,0.415769,-1.881651,-0.576203,0.115881,0.277901,-0.007832,0.471710,-0.567560,0.049227,-0.158113,0.050774,0.118859,0.430887,0.094232,0.028259,0.026908,-0.143423,0.000778,0.563231,0.953865,-0.775902,-1.035796,0.823043,1.071390,1.108420,0.929579,0.909548,-0.502757,-0.678143,-0.689047,-0.375163,-0.700765,-0.099309,0.253757,-0.351165,-0.128878,0.102324,0.455360,0.759512,0.690158,-1.298756,-0.639192,0.055734,-0.106908,-1.009456,-1.122979,-0.780212,-0.391820,-0.082002,-1.122979,0.176158,-0.391820,-1.009456,-1.122979,-0.780212,-0.391820,-0.201822,-0.552044,-0.416637,-0.652222,0.809474,0.459701,0.637180,0.470632,1.992661,0.510472,1.617745,0.672807,1.537109,-0.250447,-0.209963,-0.440399,-0.307626,-0.161817,0.369152,-0.004948,0.326227,0.144302,0.003446,0.070584,-0.721487,-0.817354,1.619413,1.241234,0.301298,0.595626,1.140696,1.323386,0.121157,1.307936,1.055081,0.439246,1.307311,0.897388,-0.605114,-0.261730,-0.353634,1.506126,-0.401331,-0.403765,1.503186,-0.353134,-1.064049,-0.060457,-0.737787,-0.024910,0.279275,-0.579531,0.409255,-0.179062,-0.504355,-0.223136,-0.540041,-0.536578,0.124234,-0.009015,0.095353,-1.101128,0.364427,-0.556921,-0.335952,-0.922641,0.199910,-0.283971,0.166455,-0.469372,-1.061757,-1.113569,0.190406,0.956008,0.950873,-0.573772,0.612351
37336,-0.192869,0.877870,0.413196,-0.255481,0.334112,-0.249313,-0.656416,-0.286451,-0.871317,0.769514,-0.961539,-0.167720,0.982351,-0.984852,0.557758,-0.421958,1.187505,-1.128140,0.774830,0.625229,0.277901,-0.007832,1.605756,-0.347382,0.231531,0.282956,0.201924,0.084388,0.269970,0.450236,0.307846,0.096296,0.201359,0.044662,0.386767,-1.056529,0.735121,1.433966,0.163294,-0.089325,0.264134,0.326608,0.131132,0.062093,0.014455,-0.150746,-0.375163,-0.192155,0.288844,0.101548,0.475139,0.600972,0.972293,0.455360,0.547673,0.952623,0.621221,-0.639192,-0.460823,0.586050,0.798559,0.299818,-0.902187,-0.291494,0.264641,0.299818,0.176158,-0.291494,0.798559,0.299818,-0.902187,-0.291494,0.035199,0.180080,0.358077,-0.696099,0.149064,0.442468,-0.429020,0.726072,1.581877,0.974515,0.669815,1.060179,0.648577,0.058580,-0.062316,0.195794,-0.261132,-0.096456,0.266549,0.083676,0.231609,0.140408,0.141021,-0.029389,-0.141691,-0.792399,0.437954,1.241234,0.753880,0.890664,1.238794,0.422756,1.267876,0.657838,0.445040,0.106586,1.204779,1.133789,0.871003,-0.706811,0.478755,-1.365879,0.996479,0.994622,0.023353,-0.353134,-0.016410,0.933513,-0.063664,0.532894,0.076453,0.246041,0.409255,-0.950931,-0.780463,0.409819,-0.540041,-0.536578,0.163428,-0.196036,0.202010,0.042286,0.012660,0.098660,0.093081,0.534100,0.103436,0.081214,-0.057528,-0.200838,-0.038338,-0.080772,0.190406,-1.145456,0.881462,1.882108,0.709633
37337,-0.192869,0.877870,-0.156352,0.124345,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.167720,-0.355227,-0.075410,-0.698319,0.558047,0.021325,-0.625840,-0.102882,0.692182,0.139412,0.152161,-0.709774,0.100418,0.180978,0.155598,0.050774,0.213598,0.598791,0.206633,0.307846,0.180323,0.118039,-0.239170,0.334139,1.611764,0.120548,-1.270137,0.229022,0.233736,0.390503,0.415568,0.536738,-0.306337,-0.187242,-0.689047,-0.375163,-0.700765,-0.255119,-0.066542,-0.351165,0.321028,0.765000,0.760600,0.911052,0.459163,0.084913,0.059736,0.454617,-0.652622,0.278557,-0.174217,0.251701,-0.192706,0.035304,-0.174217,-0.550507,-0.192706,0.278557,-0.174217,0.251701,-0.192706,-0.380516,-0.147966,-0.825305,0.115229,0.225834,0.007552,0.085743,0.063453,1.581877,-0.185650,1.055628,-0.325896,0.946882,-0.560206,-0.343314,-0.329881,-0.068888,0.353855,-0.052940,0.185510,0.097650,0.130623,0.128190,0.157897,-0.595729,0.008122,1.139891,0.224576,0.677867,0.890664,0.774580,1.134857,0.802454,-0.475327,1.320379,1.056513,1.204779,0.306008,-0.228651,0.624250,-0.353634,-1.365879,1.408579,-0.403765,0.911282,0.483281,-0.680175,-0.718912,1.158652,-0.347128,-0.034996,-0.157652,-0.231371,-0.473621,-0.504355,0.409819,-0.540041,-0.536578,0.286719,-0.042947,-0.574667,-0.809865,-0.441981,-0.738334,-0.242218,-0.922641,0.143063,-0.213957,-0.084942,-0.112684,-1.061757,1.596154,-0.982839,0.810326,0.729906,0.206781,0.261494


In [24]:
from sklearn.feature_selection import VarianceThreshold

# Przykład użycia z wartością progową wariancji równą 0.1
selector = VarianceThreshold(threshold=1)
X_new = selector.fit_transform(X)

# Wybrane cechy można sprawdzić tak:
selected_features = X.columns[selector.get_support(indices=True)]
print("Wybrane cechy:", selected_features)

Wybrane cechy: Index(['attendance', 'home_team_corner_count', 'away_team_corner_count',
       'away_team_yellow_cards', 'away_team_red_cards',
       'home_team_second_half_cards', 'away_team_second_half_cards',
       'away_team_shots', 'home_team_shots_on_target',
       'away_team_shots_on_target', 'home_team_shots_off_target',
       'away_team_shots_off_target', 'home_team_fouls', 'away_team_fouls',
       'away_team_possession', 'away_team_xg', 'over_25_percentage_pre_match',
       'over_15_ht_fhg_percentage_pre_match',
       'over_05_ht_fhg_percentage_pre_match',
       'over_15_2hg_percentage_pre_match', 'over_05_2hg_percentage_pre_match',
       'average_corners_per_match_pre_match', 'odds_ft_home_team_win',
       'odds_ft_draw', 'odds_ft_away_team_win', 'odds_ft_over35',
       'yellow_cards_total', 'cards_total',
       'away_team_cumulative_goals_conceded_pre_game',
       'home_team_average_goals_scored_per_game_pre_game',
       'away_team_average_first_half_goals_sco

In [19]:
from sklearn.feature_selection import SelectKBest, chi2

# Selekcja zmiennych
selector = SelectKBest(score_func=chi2, k=10)  # Wybierz 10 najlepszych cech
X_new = selector.fit_transform(X, y)

# Wybrane cechy można sprawdzić tak:
selected_features = X.columns[selector.get_support(indices=True)]
print("Wybrane cechy:", selected_features)

ValueError: Input X must be non-negative.

In [154]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
num_features = X.select_dtypes('number').columns

In [32]:
X[num_features]

Unnamed: 0,attendance,game_week,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,home_team_xg,away_team_xg,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_ht_fhg_percentage_pre_match,over_05_ht_fhg_percentage_pre_match,over_15_2hg_percentage_pre_match,over_05_2hg_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,home_team_cumulative_goals_scored_pre_game,away_team_cumulative_goals_scored_pre_game,home_team_cumulative_goals_conceded_pre_game,away_team_cumulative_goals_conceded_pre_game,home_team_average_goals_scored_per_game_pre_game,away_team_average_goals_scored_per_game_pre_game,home_team_average_goals_conceded_per_game_pre_game,away_team_average_goals_conceded_per_game_pre_game,home_team_average_first_half_goals_scored_pre_game,home_team_average_second_half_goals_scored_pre_game,away_team_average_first_half_goals_scored_pre_game,away_team_average_second_half_goals_scored_pre_game,home_team_average_first_half_goals_conceded_pre_game,home_team_average_second_half_goals_conceded_pre_game,away_team_average_first_half_goals_conceded_pre_game,away_team_average_second_half_goals_conceded_pre_game,home_team_average_first_half_goals_total_pre_game,home_team_average_second_half_goals_total_pre_game,away_team_average_first_half_goals_total_pre_game,away_team_average_second_half_goals_total_pre_game,home_team_average_corners_total_per_game_pre_game,away_team_average_corners_total_per_game_pre_game,home_team_average_corners_per_game_pre_game,away_team_average_corners_per_game_pre_game,home_team_average_yellow_cards_total_per_game_pre_game,away_team_average_yellow_cards_total_per_game_pre_game,home_team_average_yellow_cards_per_game_pre_game,away_team_average_yellow_cards_per_game_pre_game,home_team_cumulative_red_cards_pre_game,away_team_cumulative_red_cards_pre_game,home_team_average_red_cards_total_per_game_pre_game,away_team_average_red_cards_total_per_game_pre_game,home_team_average_red_cards_per_game_pre_game,away_team_average_red_cards_per_game_pre_game,home_team_average_shots_per_game_pre_game,home_team_average_shots_on_target_per_game_pre_game,away_team_average_shots_per_game_pre_game,away_team_average_shots_on_target_per_game_pre_game,home_team_average_fouls_per_game_pre_game,away_team_average_fouls_per_game_pre_game,home_team_average_fouls_total_per_game_pre_game,away_team_average_fouls_total_per_game_pre_game,home_team_average_possession_per_game_pre_game,away_team_average_possession_per_game_pre_game,home_team_average_xg_per_game_pre_game,away_team_average_xg_per_game_pre_game,home_team_games_without_goals_pre_game,away_team_games_without_goals_pre_game,home_team_cumulative_btts_pre_game,away_team_cumulative_btts_pre_game,home_team_cumulative_clean_sheets_pre_game,away_team_cumulative_clean_sheets_pre_game,home_team_cumulative_wins_pre_game,home_team_cumulative_draws_pre_game,home_team_cumulative_losses_pre_game,away_team_cumulative_wins_pre_game,away_team_cumulative_draws_pre_game,away_team_cumulative_losses_pre_game,home_team_average_points_per_game_pre_game,away_team_average_points_per_game_pre_game,home_team_wins_in_last_5_games,home_team_draws_in_last_5_games,home_team_losses_in_last_5_games,away_team_wins_in_last_5_games,away_team_draws_in_last_5_games,away_team_losses_in_last_5_games,home_team_average_goals_scored_in_last_5_games,away_team_average_goals_scored_in_last_5_games,home_team_average_goals_conceded_in_last_5_games,away_team_average_goals_conceded_in_last_5_games,average_total_corners_in_home_team_games_in_last_5_games,average_total_corners_in_away_team_games_in_last_5_games,average_corners_by_home_team_in_last_5_games,average_corners_by_away_team_in_last_5_games,average_yellow_cards_by_home_team_in_last_5_games,average_yellow_cards_by_away_team_in_last_5_games,average_red_cards_by_home_team_in_last_5_games,average_red_cards_by_away_team_in_last_5_games,average_ball_possession_by_home_team_in_last_5_games,average_ball_possession_by_away_team_in_last_5_games,average_xg_by_home_team_in_last_5_games,average_xg_by_away_team_in_last_5_games,average_shots_by_home_team_in_last_5_games,average_shots_by_away_team_in_last_5_games,average_shots_on_target_by_home_team_in_last_5_games,average_shots_on_target_by_away_team_in_last_5_games,average_total_fouls_in_home_team_games_in_last_5_games,average_total_fouls_in_away_team_games_in_last_5_games,average_fouls_by_home_team_in_last_5_games,average_fouls_by_away_team_in_last_5_games,h2h_home_team_wins_pre_game,h2h_away_team_wins_pre_game,h2h_draws_pre_game,average_goals_h2h,average_yellow_cards_h2h,average_red_cards_h2h,average_corners_h2h
0,0.361523,-2.952938,0.413196,-1.543821,-0.477338,-0.249313,0.175667,-0.286451,-0.871317,-0.038716,0.487027,-0.167720,0.224473,-0.465673,0.759100,0.297024,-0.227154,-1.128140,-0.396684,0.478665,0.251220,0.025573,0.766704,-0.117468,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,-0.735041,0.330693,0.814896,-0.037778,-0.194898,-0.106397,-0.038335,0.230945,0.184005,-0.415458,-0.150746,-0.375163,-0.192155,-0.099309,0.520815,-0.710538,0.121808,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
1,0.294317,-2.952938,0.631711,0.670255,0.875036,-0.249313,-0.656416,-0.286451,1.576967,-0.038716,-0.961539,-0.167720,0.224473,-0.258505,0.759100,-0.019866,-0.227154,-0.255604,-0.102882,0.872805,0.380066,-0.149504,0.695202,0.119659,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,-0.789394,0.397598,0.948540,-0.003855,-0.152392,-0.060752,-0.210904,0.462157,-0.045211,0.645777,0.246025,-0.375163,0.182732,-0.027143,0.392818,-0.351165,0.423545,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
2,0.000320,-2.952938,-0.552625,1.222485,1.273740,-0.249313,-0.656416,-0.286451,1.576967,0.769514,-0.961539,-0.167720,0.098144,-0.258505,-0.278042,0.297024,0.415769,-0.625840,0.835195,0.478665,0.224049,0.058244,-0.038501,-0.036891,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,0.001545,-0.349427,-0.211075,0.326429,0.425030,0.548180,0.259917,0.309296,0.097278,0.508199,0.558721,-0.375163,0.478181,-0.099309,-0.066542,-0.063379,0.573556,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
3,0.316974,-2.952938,-0.156352,0.124345,-0.477338,-0.249313,1.139187,-0.286451,0.623418,-1.305658,0.487027,1.181221,0.098144,-0.075410,0.557758,0.297024,-0.227154,-0.255604,-0.240721,0.755526,0.428622,-0.225744,-0.020911,-0.434139,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,-0.845019,0.397598,1.130623,0.096923,0.034392,0.157122,-0.210904,0.735811,-0.230135,-0.187242,0.558721,-0.375163,0.478181,-0.027143,0.392818,-0.351165,0.321028,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
4,0.047510,-2.952938,-0.156352,0.423686,0.334112,-0.249313,0.175667,-0.286451,-0.871317,0.769514,-0.961539,0.641671,-0.991190,0.721106,0.053178,1.142650,-2.209907,0.276523,-0.102882,0.217585,-0.597655,0.661012,-1.408920,1.068318,-3.531821,-4.017612,-3.479611,-2.688612,-1.800675,-2.870764,-3.957229,-3.315770,-4.059987,-4.029459,-3.646584,1.921808,0.620658,-1.288675,-0.209920,-0.548878,-0.500193,-0.210904,0.110913,0.269014,0.014455,0.246025,-0.375163,0.182732,-0.027143,0.639376,-0.710538,0.076564,-3.078989,-3.109555,-3.123334,-3.093696,-2.927433,-2.937106,-3.071202,-3.059016,-2.032415,-2.280902,-2.039617,-2.284466,-2.144633,-2.280902,-2.144141,-2.284466,-2.032415,-2.280902,-2.039617,-2.284466,-5.602360,-5.630436,-4.817409,-4.881072,-4.735677,-4.747870,-3.856919,-3.815675,-1.282051,-1.276856,-1.138613,-1.142931,-0.862181,-0.870332,-5.287571,-4.885692,-5.317983,-4.928953,-4.676742,-4.676604,-4.795416,-4.796576,-5.664023,-5.672246,-4.503803,-4.531243,-1.928950,-1.908464,-2.674660,-2.674064,-1.959246,-1.978420,-2.176148,-2.036064,-2.175836,-2.212670,-2.035258,-2.144572,-2.679017,-2.723218,-1.658445,-1.365879,-1.716055,-1.719031,-1.368519,-1.664258,-2.396769,-2.421775,-2.481167,-2.457053,-4.771525,-4.799641,-4.051084,-4.113280,-3.079621,-3.039717,-0.540041,-0.536578,-5.141956,-5.166696,-4.057856,-4.092117,-4.680862,-4.716938,-4.194054,-4.235613,-3.957477,-3.982570,-3.930029,-3.947361,-1.061757,-1.113569,-0.982839,-2.039406,-2.119254,-0.573772,-2.332818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37334,-0.192869,0.877870,0.155951,0.670255,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.167720,-0.991190,-0.075410,-0.698319,-0.019866,-0.911584,0.036179,-1.369658,-0.255615,-0.113231,0.376978,-1.507232,-0.325961,0.394001,0.238821,0.129867,0.118859,0.598791,-0.002557,0.186051,0.238004,0.317742,0.562678,0.763683,1.427027,0.120548,-1.224542,0.096923,0.034392,0.097781,0.218537,0.347985,-0.155227,0.358729,-0.689047,-0.375163,-0.700765,-0.524615,-0.466516,-0.519767,-0.551543,0.741339,1.168672,0.547673,0.276929,0.026331,1.135308,-0.460823,-1.033730,-0.401265,0.299818,1.506539,0.627009,-0.820394,0.299818,-0.935231,0.627009,-0.401265,0.299818,1.506539,0.627009,-0.060707,0.069644,-0.087545,0.698479,0.719044,0.187212,0.637180,-0.162297,2.301445,1.808152,1.617745,1.624568,1.828476,1.280433,-0.435786,-0.478486,0.548132,0.867503,-0.037359,-0.040834,-0.002948,-0.032464,0.127269,0.239602,-0.909675,1.083226,0.777448,-0.438414,0.753880,0.824439,1.140696,1.482976,1.062093,0.965623,0.576227,1.536587,0.657490,-0.724806,0.561590,1.414600,0.478755,0.914160,-0.401331,1.406892,0.023353,-1.664258,-0.016410,1.318869,0.226655,-1.523345,0.247134,0.074440,0.476086,0.229863,0.562555,0.005660,0.647405,0.670842,0.182540,0.245359,0.333728,0.721363,0.290000,0.494925,0.506879,0.706615,-0.157781,0.091506,-0.030673,0.022323,-1.061757,-1.113569,0.190406,-2.039406,0.461646,2.888700,0.444185
37335,-0.192869,0.877870,-0.156352,-0.770792,-0.477338,-0.249313,-0.656416,-0.286451,0.623418,-1.305658,0.487027,-1.436481,0.341488,-0.704131,0.326005,0.297024,0.415769,-1.881651,-0.576203,0.115881,0.277901,-0.007832,0.471710,-0.567560,0.049227,-0.158113,0.050774,0.118859,0.430887,0.094232,0.028259,0.026908,-0.143423,0.000778,0.563231,0.953865,-0.775902,-1.035796,0.823043,1.071390,1.108420,0.929579,0.909548,-0.502757,-0.678143,-0.689047,-0.375163,-0.700765,-0.099309,0.253757,-0.351165,-0.128878,0.102324,0.455360,0.759512,0.690158,-1.298756,-0.639192,0.055734,-0.106908,-1.009456,-1.122979,-0.780212,-0.391820,-0.082002,-1.122979,0.176158,-0.391820,-1.009456,-1.122979,-0.780212,-0.391820,-0.201822,-0.552044,-0.416637,-0.652222,0.809474,0.459701,0.637180,0.470632,1.992661,0.510472,1.617745,0.672807,1.537109,-0.250447,-0.209963,-0.440399,-0.307626,-0.161817,0.369152,-0.004948,0.326227,0.144302,0.003446,0.070584,-0.721487,-0.817354,1.619413,1.241234,0.301298,0.595626,1.140696,1.323386,0.121157,1.307936,1.055081,0.439246,1.307311,0.897388,-0.605114,-0.261730,-0.353634,1.506126,-0.401331,-0.403765,1.503186,-0.353134,-1.064049,-0.060457,-0.737787,-0.024910,0.279275,-0.579531,0.409255,-0.179062,-0.504355,-0.223136,-0.540041,-0.536578,0.124234,-0.009015,0.095353,-1.101128,0.364427,-0.556921,-0.335952,-0.922641,0.199910,-0.283971,0.166455,-0.469372,-1.061757,-1.113569,0.190406,0.956008,0.950873,-0.573772,0.612351
37336,-0.192869,0.877870,0.413196,-0.255481,0.334112,-0.249313,-0.656416,-0.286451,-0.871317,0.769514,-0.961539,-0.167720,0.982351,-0.984852,0.557758,-0.421958,1.187505,-1.128140,0.774830,0.625229,0.277901,-0.007832,1.605756,-0.347382,0.231531,0.282956,0.201924,0.084388,0.269970,0.450236,0.307846,0.096296,0.201359,0.044662,0.386767,-1.056529,0.735121,1.433966,0.163294,-0.089325,0.264134,0.326608,0.131132,0.062093,0.014455,-0.150746,-0.375163,-0.192155,0.288844,0.101548,0.475139,0.600972,0.972293,0.455360,0.547673,0.952623,0.621221,-0.639192,-0.460823,0.586050,0.798559,0.299818,-0.902187,-0.291494,0.264641,0.299818,0.176158,-0.291494,0.798559,0.299818,-0.902187,-0.291494,0.035199,0.180080,0.358077,-0.696099,0.149064,0.442468,-0.429020,0.726072,1.581877,0.974515,0.669815,1.060179,0.648577,0.058580,-0.062316,0.195794,-0.261132,-0.096456,0.266549,0.083676,0.231609,0.140408,0.141021,-0.029389,-0.141691,-0.792399,0.437954,1.241234,0.753880,0.890664,1.238794,0.422756,1.267876,0.657838,0.445040,0.106586,1.204779,1.133789,0.871003,-0.706811,0.478755,-1.365879,0.996479,0.994622,0.023353,-0.353134,-0.016410,0.933513,-0.063664,0.532894,0.076453,0.246041,0.409255,-0.950931,-0.780463,0.409819,-0.540041,-0.536578,0.163428,-0.196036,0.202010,0.042286,0.012660,0.098660,0.093081,0.534100,0.103436,0.081214,-0.057528,-0.200838,-0.038338,-0.080772,0.190406,-1.145456,0.881462,1.882108,0.709633
37337,-0.192869,0.877870,-0.156352,0.124345,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.167720,-0.355227,-0.075410,-0.698319,0.558047,0.021325,-0.625840,-0.102882,0.692182,0.139412,0.152161,-0.709774,0.100418,0.180978,0.155598,0.050774,0.213598,0.598791,0.206633,0.307846,0.180323,0.118039,-0.239170,0.334139,1.611764,0.120548,-1.270137,0.229022,0.233736,0.390503,0.415568,0.536738,-0.306337,-0.187242,-0.689047,-0.375163,-0.700765,-0.255119,-0.066542,-0.351165,0.321028,0.765000,0.760600,0.911052,0.459163,0.084913,0.059736,0.454617,-0.652622,0.278557,-0.174217,0.251701,-0.192706,0.035304,-0.174217,-0.550507,-0.192706,0.278557,-0.174217,0.251701,-0.192706,-0.380516,-0.147966,-0.825305,0.115229,0.225834,0.007552,0.085743,0.063453,1.581877,-0.185650,1.055628,-0.325896,0.946882,-0.560206,-0.343314,-0.329881,-0.068888,0.353855,-0.052940,0.185510,0.097650,0.130623,0.128190,0.157897,-0.595729,0.008122,1.139891,0.224576,0.677867,0.890664,0.774580,1.134857,0.802454,-0.475327,1.320379,1.056513,1.204779,0.306008,-0.228651,0.624250,-0.353634,-1.365879,1.408579,-0.403765,0.911282,0.483281,-0.680175,-0.718912,1.158652,-0.347128,-0.034996,-0.157652,-0.231371,-0.473621,-0.504355,0.409819,-0.540041,-0.536578,0.286719,-0.042947,-0.574667,-0.809865,-0.441981,-0.738334,-0.242218,-0.922641,0.143063,-0.213957,-0.084942,-0.112684,-1.061757,1.596154,-0.982839,0.810326,0.729906,0.206781,0.261494


In [35]:
y.value_counts(normalize=True)

result
1    0.436059
2    0.305338
0    0.258604
Name: proportion, dtype: float64

In [36]:
features_scores = mutual_info_classif(X[num_features], y, random_state=42)

In [51]:
pd.DataFrame(dict(zip(num_features, features_scores)), index=['mutual_info_score']).T.sort_values(by='mutual_info_score', ascending=False)

Unnamed: 0,mutual_info_score
odds_ft_home_team_win,0.09371
odds_ft_away_team_win,0.092769
home_team_shots_on_target,0.090075
away_team_shots_on_target,0.082284
away_team_xg,0.04529
home_team_xg,0.036807
away_team_shots,0.034007
home_team_shots,0.025287
away_team_average_points_per_game_pre_game,0.024887
average_ball_possession_by_away_team_in_last_5_games,0.024528


# Result prediction

## Random forest

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

### Base

In [56]:
rf_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)

In [57]:
rf_model.fit(X_train, y_train)

In [58]:
y_pred = rf_model.predict(X_test)

In [59]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.03      0.07      1951
           1       0.53      0.90      0.67      3268
           2       0.57      0.47      0.52      2249

    accuracy                           0.54      7468
   macro avg       0.66      0.47      0.42      7468
weighted avg       0.63      0.54      0.46      7468



### Hyperparameters tuning with optuna

In [168]:
def objective(trial):
    # Number of trees in random forest
    n_estimators = trial.suggest_int(name="n_estimators", low=100, high=500, step=100)

    # Number of features to consider at every split
    max_features = trial.suggest_categorical(name="max_features", choices=['sqrt', 'log2', None]) 

    # Maximum number of levels in tree
    max_depth = trial.suggest_int(name="max_depth", low=10, high=110, step=20)

    # Minimum number of samples required to split a node
    min_samples_split = trial.suggest_int(name="min_samples_split", low=2, high=10, step=2)

    # Minimum number of samples required at each leaf node
    min_samples_leaf = trial.suggest_int(name="min_samples_leaf", low=1, high=4, step=1)
    
    params = {
        "n_estimators": n_estimators,
        "max_features": max_features,
        "max_depth": max_depth,
        "min_samples_split": min_samples_split,
        "min_samples_leaf": min_samples_leaf
    }
    model = RandomForestClassifier(random_state=42, **params)
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    # cv_score = cross_val_score(model, X_train, y_train, n_jobs=4, cv=5)
    # mean_cv_accuracy = cv_score.mean()
    return acc

In [169]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

[I 2024-07-13 16:38:31,007] A new study created in memory with name: no-name-8088fcba-2a49-4459-9989-09039185b482
[W 2024-07-13 16:40:11,421] Trial 0 failed with parameters: {'n_estimators': 400, 'max_features': None, 'max_depth': 110, 'min_samples_split': 6, 'min_samples_leaf': 3} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\envs\magisterka_env\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Łukasz\AppData\Local\Temp\ipykernel_31148\3511437812.py", line 26, in objective
    model.fit(X_train, y_train)
  File "c:\ProgramData\anaconda3\envs\magisterka_env\Lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\magisterka_env\Lib\site-packages\sklearn\ensemble\_forest.py

KeyboardInterrupt: 

## XGBoost

In [60]:
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=3)

In [61]:
xgb_model.fit(X_train, y_train)

In [62]:
y_pred = xgb_model.predict(X_test)

In [63]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.45      0.26      0.33      1951
           1       0.68      0.81      0.74      3268
           2       0.64      0.68      0.66      2249

    accuracy                           0.63      7468
   macro avg       0.59      0.58      0.58      7468
weighted avg       0.60      0.63      0.61      7468



In [150]:
# Ustawienia dla XGBoost
params = {
    'max_depth': 3,  # głębokość drzew
    'eta': 0.1,      # współczynnik uczenia
    'objective': 'multi:softprob',  # funkcja straty dla klasyfikacji wieloklasowej
    'num_class': 3   # liczba klas wynikowych
}

# Przekształcenie danych do formatu DMatrix, który jest bardziej efektywny dla XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Trenowanie modelu
bst = xgb.train(params, dtrain, num_boost_round=100)

# Predykcja
y_pred_prob = bst.predict(dtest)
y_pred = np.argmax(y_pred_prob, axis=1)  # Wybieramy klasę z najwyższym prawdopodobieństwem

# Ocena modelu
accuracy = accuracy_score(y_test, y_pred)
print("Dokładność klasyfikacji XGBoost: {:.2f}%".format(accuracy * 100))

Dokładność klasyfikacji XGBoost: 62.91%


In [64]:
import xgboost as xgb
# from sklearn.metrics import mean_squared_error
import optuna

def objective(trial):
    params = {
        "objective": "multi:softprob",
        # "objective": "multi:softmax",
        # "num_class": "3",
        "n_estimators": 1000,
        "verbosity": 0,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        # "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
        "max_depth": trial.suggest_int("max_depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
    }

    model = xgb.XGBClassifier(**params)
    model.fit(X_train, y_train, verbose=False)
    
    predictions = model.predict(X_test)
    acc = accuracy_score(y_test, predictions)
    return acc

In [65]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

[I 2024-07-14 15:36:48,491] A new study created in memory with name: no-name-ace3165d-7266-4987-8550-de89ec0e86c8
[I 2024-07-14 15:36:59,232] Trial 0 finished with value: 0.5938671665773969 and parameters: {'learning_rate': 0.0031018087274741766, 'max_depth': 2, 'subsample': 0.6917962983186509, 'colsample_bytree': 0.5121727666643779, 'min_child_weight': 1}. Best is trial 0 with value: 0.5938671665773969.
[I 2024-07-14 15:37:42,233] Trial 1 finished with value: 0.5871719335832887 and parameters: {'learning_rate': 0.0016695431307818262, 'max_depth': 8, 'subsample': 0.9054826412985704, 'colsample_bytree': 0.12880535337806984, 'min_child_weight': 7}. Best is trial 0 with value: 0.5938671665773969.
[I 2024-07-14 15:37:53,175] Trial 2 finished with value: 0.6360471344402785 and parameters: {'learning_rate': 0.07039596811280739, 'max_depth': 2, 'subsample': 0.5756458476465504, 'colsample_bytree': 0.3469167984940352, 'min_child_weight': 3}. Best is trial 2 with value: 0.6360471344402785.
[I 20

In [73]:
y_test

27295    2
15565    2
18623    1
26929    1
35578    1
        ..
32348    0
9068     0
8230     1
15878    0
6410     2
Name: result, Length: 7468, dtype: int32

In [67]:
xgb_optuna_best_params = study.best_params

In [75]:
xgb_optuna_best_params['objective'] = 'multi:softprob'

In [79]:
xgb_optuna_best_params['num_class'] = 3

In [80]:
xgb_optuna_best_params

{'learning_rate': 0.018854188545073352,
 'max_depth': 6,
 'subsample': 0.7473497986534416,
 'colsample_bytree': 0.42759609597862747,
 'min_child_weight': 12,
 'objective': 'multi:softprob',
 'num_class': 3}

In [81]:
#converting data to dmatrix format is more effective for xgboost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [82]:
#training xgboost on optimal params from optuna
bst = xgb.train(xgb_optuna_best_params, dtrain, num_boost_round=100)

In [83]:
y_pred_prob

array([0.975096  , 0.87281215, 0.949675  , ..., 0.92944056, 0.8956829 ,
       1.4719692 ], dtype=float32)

In [84]:
#prediction
y_pred_prob = bst.predict(dtest)
y_pred = np.argmax(y_pred_prob, axis=1)

In [86]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.55      0.10      0.17      1951
           1       0.63      0.87      0.73      3268
           2       0.61      0.70      0.65      2249

    accuracy                           0.62      7468
   macro avg       0.60      0.56      0.52      7468
weighted avg       0.60      0.62      0.56      7468



## LightGBM

In [155]:
import lightgbm as lgb

# Ustawienia
params = {
    'boosting_type': 'gbdt',
    'objective': 'multiclass',
    'num_class': 3,
    'metric': 'multi_logloss',
    'learning_rate': 0.1,
    'max_depth': 3,
    'num_leaves': 31,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5
}

# Trenowanie
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# Model
model = lgb.train(params, train_data, valid_sets=[test_data], num_boost_round=100)

# Predykcja
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)

# Ocena
accuracy = accuracy_score(y_test, y_pred)
print("Dokładność klasyfikacji LightGBM: {:.2f}%".format(accuracy * 100))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019235 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19958
[LightGBM] [Info] Number of data points in the train set: 29871, number of used features: 149
[LightGBM] [Info] Start training from score -1.355019
[LightGBM] [Info] Start training from score -0.830862
[LightGBM] [Info] Start training from score -1.182916
Dokładność klasyfikacji LightGBM: 63.20%


### Hyperparameter tuning

In [95]:
def objective(trial):
    param = {
        'objective': 'multiclass',
        'metric': 'multi_logloss',
        'num_class': 3,
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.005, 0.2),
        'max_depth': trial.suggest_int('max_depth', 1, 10)
    }

    train_data = lgb.Dataset(X_train, label=y_train)
    test_data = lgb.Dataset(X_test, label=y_test)

    model = lgb.train(param, train_data, valid_sets=[test_data])

    y_pred = model.predict(X_test)
    predictions = np.argmax(y_pred, axis=1)
    acc = accuracy_score(y_test, predictions)

    return acc


In [96]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2024-07-14 16:17:00,259] A new study created in memory with name: no-name-45cab9c6-2255-439f-978f-d561460a9520
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.005, 0.2),
[I 2024-07-14 16:17:03,804] Trial 0 finished with value: 0.6044456347080879 and parameters: {'lambda_l1': 2.2646421497412645, 'lambda_l2': 0.0741521411554788, 'num_leaves': 32, 'feature_fraction': 0.8585428064282418, 'bagging_fraction': 0.7711771270711225, 'bagging_freq': 6, 'min_child_samples': 79, 'learning_rate': 0.009647213904150007, 'max_depth': 5}. Best is trial 0 with value: 0.6044456347080879.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 

In [99]:
lgbm_optuna_best_params = study.best_params

In [98]:
study.best_value

0.6373861810391002

## Catboost

In [102]:
# Inicjalizacja modelu
model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth=3,
    loss_function='MultiClass',  # dla klasyfikacji wieloklasowej
    verbose=True
)

# Trenowanie modelu
model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=10)

# Predykcja
y_pred = model.predict(X_test)

# Ocena modelu
accuracy = accuracy_score(y_test, y_pred)
print("Dokładność klasyfikacji CatBoost: {:.2f}%".format(accuracy * 100))

0:	learn: 1.0686533	test: 1.0688561	best: 1.0688561 (0)	total: 40.3ms	remaining: 40.3s
1:	learn: 1.0445600	test: 1.0447754	best: 1.0447754 (1)	total: 76.9ms	remaining: 38.4s
2:	learn: 1.0261194	test: 1.0262857	best: 1.0262857 (2)	total: 114ms	remaining: 38s
3:	learn: 1.0127644	test: 1.0129492	best: 1.0129492 (3)	total: 156ms	remaining: 38.9s
4:	learn: 0.9975257	test: 0.9981369	best: 0.9981369 (4)	total: 210ms	remaining: 41.8s
5:	learn: 0.9852412	test: 0.9860121	best: 0.9860121 (5)	total: 263ms	remaining: 43.5s
6:	learn: 0.9742444	test: 0.9751168	best: 0.9751168 (6)	total: 306ms	remaining: 43.3s
7:	learn: 0.9652384	test: 0.9662150	best: 0.9662150 (7)	total: 337ms	remaining: 41.8s
8:	learn: 0.9568264	test: 0.9576974	best: 0.9576974 (8)	total: 373ms	remaining: 41s
9:	learn: 0.9490877	test: 0.9498515	best: 0.9498515 (9)	total: 404ms	remaining: 40s
10:	learn: 0.9431068	test: 0.9442454	best: 0.9442454 (10)	total: 431ms	remaining: 38.8s
11:	learn: 0.9366286	test: 0.9379061	best: 0.9379061 (11

### Hyperparameter tuning

In [100]:
def objective(trial):
    param = {
        'loss_function': 'MultiClass',
        'iterations': trial.suggest_int('iterations', 50, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 1, 10),
        'border_count': trial.suggest_int('border_count', 5, 255),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_int('random_strength', 0, 100),
        'random_state': 42
    }

    model = CatBoostClassifier(**param)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False, early_stopping_rounds=100)

    y_pred = model.predict(X_test)
    predictions = np.argmax(y_pred, axis=1)
    
    acc = accuracy_score(y_test, predictions)
    
    return acc

In [101]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2024-07-14 16:24:04,305] A new study created in memory with name: no-name-d4b4ff32-4fa9-40c1-bc5b-e62c25399d2e
[I 2024-07-14 16:26:05,393] Trial 0 finished with value: 0.26124799143010174 and parameters: {'iterations': 327, 'learning_rate': 0.10869176717223722, 'depth': 9, 'l2_leaf_reg': 1, 'border_count': 239, 'bagging_temperature': 0.13453406117243294, 'random_strength': 10}. Best is trial 0 with value: 0.26124799143010174.
[W 2024-07-14 16:26:43,242] Trial 1 failed with parameters: {'iterations': 560, 'learning_rate': 0.02101335002607218, 'depth': 10, 'l2_leaf_reg': 10, 'border_count': 19, 'bagging_temperature': 0.980634100238481, 'random_strength': 39} because of the following error: KeyboardInterrupt('').
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\envs\magisterka_env\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Łukasz\AppData\Local\Temp\ipykerne

KeyboardInterrupt: 

## Neural Network

In [123]:
np.array(y).reshape(-1, 1)

array([[1],
       [1],
       [2],
       ...,
       [1],
       [2],
       [2]], dtype=int64)

In [124]:
encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(np.array(y).reshape(-1, 1))



In [128]:
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.3, random_state=42)

In [129]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(10, activation='relu', input_shape=(X.shape[1],)),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax')
])

In [130]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [131]:
model.fit(X_train, y_train, epochs=100, verbose=1)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

<keras.src.callbacks.History at 0x14c8c673f90>

In [132]:
_, accuracy = model.evaluate(X_test, y_test)



In [139]:
X_train

Unnamed: 0,attendance,game_week,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,home_team_xg,away_team_xg,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_ht_fhg_percentage_pre_match,over_05_ht_fhg_percentage_pre_match,over_15_2hg_percentage_pre_match,over_05_2hg_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,home_team_cumulative_goals_scored_pre_game,away_team_cumulative_goals_scored_pre_game,home_team_cumulative_goals_conceded_pre_game,away_team_cumulative_goals_conceded_pre_game,home_team_average_goals_scored_per_game_pre_game,away_team_average_goals_scored_per_game_pre_game,home_team_average_goals_conceded_per_game_pre_game,away_team_average_goals_conceded_per_game_pre_game,home_team_average_first_half_goals_scored_pre_game,home_team_average_second_half_goals_scored_pre_game,away_team_average_first_half_goals_scored_pre_game,away_team_average_second_half_goals_scored_pre_game,home_team_average_first_half_goals_conceded_pre_game,home_team_average_second_half_goals_conceded_pre_game,away_team_average_first_half_goals_conceded_pre_game,away_team_average_second_half_goals_conceded_pre_game,home_team_average_first_half_goals_total_pre_game,home_team_average_second_half_goals_total_pre_game,away_team_average_first_half_goals_total_pre_game,away_team_average_second_half_goals_total_pre_game,home_team_average_corners_total_per_game_pre_game,away_team_average_corners_total_per_game_pre_game,home_team_average_corners_per_game_pre_game,away_team_average_corners_per_game_pre_game,home_team_average_yellow_cards_total_per_game_pre_game,away_team_average_yellow_cards_total_per_game_pre_game,home_team_average_yellow_cards_per_game_pre_game,away_team_average_yellow_cards_per_game_pre_game,home_team_cumulative_red_cards_pre_game,away_team_cumulative_red_cards_pre_game,home_team_average_red_cards_total_per_game_pre_game,away_team_average_red_cards_total_per_game_pre_game,home_team_average_red_cards_per_game_pre_game,away_team_average_red_cards_per_game_pre_game,home_team_average_shots_per_game_pre_game,home_team_average_shots_on_target_per_game_pre_game,away_team_average_shots_per_game_pre_game,away_team_average_shots_on_target_per_game_pre_game,home_team_average_fouls_per_game_pre_game,away_team_average_fouls_per_game_pre_game,home_team_average_fouls_total_per_game_pre_game,away_team_average_fouls_total_per_game_pre_game,home_team_average_possession_per_game_pre_game,away_team_average_possession_per_game_pre_game,home_team_average_xg_per_game_pre_game,away_team_average_xg_per_game_pre_game,home_team_games_without_goals_pre_game,away_team_games_without_goals_pre_game,home_team_cumulative_btts_pre_game,away_team_cumulative_btts_pre_game,home_team_cumulative_clean_sheets_pre_game,away_team_cumulative_clean_sheets_pre_game,home_team_cumulative_wins_pre_game,home_team_cumulative_draws_pre_game,home_team_cumulative_losses_pre_game,away_team_cumulative_wins_pre_game,away_team_cumulative_draws_pre_game,away_team_cumulative_losses_pre_game,home_team_average_points_per_game_pre_game,away_team_average_points_per_game_pre_game,home_team_wins_in_last_5_games,home_team_draws_in_last_5_games,home_team_losses_in_last_5_games,away_team_wins_in_last_5_games,away_team_draws_in_last_5_games,away_team_losses_in_last_5_games,home_team_average_goals_scored_in_last_5_games,away_team_average_goals_scored_in_last_5_games,home_team_average_goals_conceded_in_last_5_games,away_team_average_goals_conceded_in_last_5_games,average_total_corners_in_home_team_games_in_last_5_games,average_total_corners_in_away_team_games_in_last_5_games,average_corners_by_home_team_in_last_5_games,average_corners_by_away_team_in_last_5_games,average_yellow_cards_by_home_team_in_last_5_games,average_yellow_cards_by_away_team_in_last_5_games,average_red_cards_by_home_team_in_last_5_games,average_red_cards_by_away_team_in_last_5_games,average_ball_possession_by_home_team_in_last_5_games,average_ball_possession_by_away_team_in_last_5_games,average_xg_by_home_team_in_last_5_games,average_xg_by_away_team_in_last_5_games,average_shots_by_home_team_in_last_5_games,average_shots_by_away_team_in_last_5_games,average_shots_on_target_by_home_team_in_last_5_games,average_shots_on_target_by_away_team_in_last_5_games,average_total_fouls_in_home_team_games_in_last_5_games,average_total_fouls_in_away_team_games_in_last_5_games,average_fouls_by_home_team_in_last_5_games,average_fouls_by_away_team_in_last_5_games,h2h_home_team_wins_pre_game,h2h_away_team_wins_pre_game,h2h_draws_pre_game,average_goals_h2h,average_yellow_cards_h2h,average_red_cards_h2h,average_corners_h2h
3995,0.653716,-0.316070,0.821555,0.124345,-1.749327,-0.249313,0.730344,-0.286451,-0.871317,-1.305658,-0.961539,1.181221,-0.039104,0.821918,-0.278042,-0.421958,0.232395,1.305474,0.132395,0.115881,0.521205,-0.391056,-0.447136,0.503527,0.693440,0.374598,0.299257,0.770764,0.669225,0.505028,0.443898,0.026908,0.356403,0.123903,-0.769169,-0.452148,0.084053,0.295947,0.196238,-0.259359,0.198761,0.326608,-0.033083,0.269014,0.508199,-0.150746,-0.375163,-0.192155,0.344694,-0.466516,0.796271,0.121808,-0.059198,-0.882868,0.138541,-0.006274,0.390441,-1.365601,0.958741,0.554472,0.718230,-0.047209,-0.939017,-1.279721,1.021257,-0.047209,0.754500,-1.279721,0.718230,-0.047209,-0.939017,-1.279721,0.174757,0.073900,0.004092,-0.219940,0.204966,-0.089704,0.690692,-0.534191,0.505874,-0.185650,0.611258,-0.549698,0.887374,0.027720,0.502184,0.512240,0.396074,-0.370223,0.402148,0.299037,0.319111,0.092751,0.300631,-0.093483,0.407137,0.078035,-0.451312,0.224576,0.180990,-0.111832,-1.047970,-0.479387,-0.741960,0.228562,-0.099539,-2.212670,0.463684,0.129632,-0.412020,-1.479866,-0.353634,1.506126,-0.401331,-1.719031,0.023353,1.451818,-0.016410,-1.099716,0.226655,1.204809,0.112012,0.213094,-0.328856,0.154742,0.887955,-0.223136,0.647405,-0.536578,0.251533,-0.003454,-0.317564,0.091610,0.157479,0.427884,-0.152796,-0.187791,0.446447,0.006303,0.592615,0.190789,-1.061757,0.578088,-0.982839,0.316115,-1.033311,-0.573772,0.294538
11237,0.141235,0.201437,0.631711,0.124345,0.334112,-0.249313,1.139187,-0.286451,0.623418,-0.038716,2.027137,-0.167720,0.224473,0.237112,-0.698319,0.558047,0.723007,0.036179,0.020590,0.872805,0.380066,-0.149504,0.217126,0.379706,0.007325,-0.105563,0.029733,0.213598,0.634933,0.094232,0.159452,0.002370,0.053095,-0.007816,0.733305,0.001545,-0.479211,-0.160445,0.261648,0.330354,0.436465,0.698273,0.230945,0.184005,0.358729,0.816289,-0.375163,0.721543,0.170405,-0.066542,0.382307,0.455452,0.393566,0.487837,0.309633,0.047619,0.485349,0.725980,0.271484,-0.383192,0.569857,0.259894,1.403132,-0.101499,-0.045157,0.259894,-0.464209,-0.101499,0.569857,0.259894,1.403132,-0.101499,0.095443,0.068756,0.333571,-0.111789,0.040610,0.478942,-0.029692,1.232571,0.970074,-0.185650,0.325959,-0.035198,0.779316,-0.308482,-0.026915,0.227469,0.095974,0.233135,0.383724,0.554270,0.424901,0.429724,0.189496,0.133596,0.332781,0.361306,0.621519,-0.438414,0.180990,0.180909,-0.079106,0.767166,-0.095144,0.463949,0.445040,0.794392,0.228398,-0.724806,-0.300104,1.147768,-0.353634,0.914160,0.437382,1.406892,0.023353,-1.664258,0.767226,0.481103,0.490782,-0.703115,0.180938,-0.157652,0.339986,-0.179062,0.187342,0.589725,-0.540041,-0.536578,0.291043,0.166579,0.376756,0.777342,0.129570,0.357835,-0.067315,0.650822,0.524403,0.416714,0.524726,0.455996,-1.061757,-1.113569,0.190406,0.024976,0.461646,-0.573772,0.686251
26024,0.167918,0.612000,1.275724,-0.770792,-0.477338,-0.249313,-1.960748,-0.286451,0.623418,-1.305658,-0.961539,-1.436481,1.598514,-0.984852,1.606458,-0.967478,1.455943,-0.625840,0.328564,0.004561,0.730124,-0.908612,2.376809,-1.961489,0.380496,0.364890,0.558656,0.862770,1.149458,0.691518,0.472431,0.434326,0.277240,0.225651,0.137041,-1.221192,2.443568,2.467309,-0.640893,-1.367575,-1.476163,-1.170156,-0.096152,0.434114,0.645777,-1.496563,-0.375163,-1.463741,0.770496,0.853114,0.796271,0.165044,1.380103,0.421659,0.453271,1.227534,2.378147,-0.255428,-0.190957,1.996357,2.925089,1.930044,0.376568,-0.849988,-0.035893,1.930044,1.919193,-0.849988,2.925089,1.930044,0.376568,-0.849988,0.307791,0.150979,0.666757,-0.226750,-0.431871,0.383083,-0.799990,0.686279,-0.190480,0.510472,-0.368838,0.138884,-0.479456,-0.089102,0.946165,0.894897,0.204956,-0.193767,0.182370,0.217696,0.085937,0.259987,0.425610,0.109649,1.300836,-0.129777,-1.928950,0.628909,0.953152,0.890664,0.617071,-1.063225,1.489211,-0.475327,-1.302211,-0.764779,0.228398,1.324333,1.738701,-1.632513,1.442629,-1.365879,-0.401331,-0.403765,-1.368519,1.451818,1.187126,-1.099716,0.226655,1.204809,-0.034996,0.488647,-0.050037,-0.090038,-0.504355,1.330392,0.647405,-0.536578,0.349951,0.013070,0.830260,-0.437193,0.767757,-0.018375,0.567925,-0.277359,0.113480,0.121813,0.254940,0.299189,-0.038338,-1.113569,-0.982839,1.741656,0.121258,-0.573772,-0.191000
3552,-3.081844,0.877819,0.413196,0.124345,0.875036,-0.249313,0.730344,-0.286451,1.576967,-0.038716,2.027137,-1.436481,-0.039104,0.821918,0.557758,1.295151,-0.528815,0.276523,0.328564,-0.800058,0.018470,0.268814,0.099989,1.342124,0.338586,0.393687,0.501169,0.727548,0.818456,0.619301,0.394110,0.462817,0.201359,0.679594,-0.064674,2.010504,1.048967,-1.288675,-0.530723,-1.182920,-1.265351,-0.986587,-0.608971,1.036442,0.195127,0.816289,-0.375163,0.721543,0.344694,0.950192,-0.200137,-0.073319,1.076814,1.490344,0.711056,0.491530,0.905382,2.056472,-0.066912,-0.580231,1.367269,0.299818,1.998181,2.057934,0.035304,0.299818,-0.550507,2.057934,1.367269,0.299818,1.998181,2.057934,0.194090,0.252536,0.669389,0.999481,-0.230314,-0.325090,-0.329034,-0.355294,1.581877,0.510472,0.276413,-0.325896,0.946882,-0.250447,0.711030,0.722852,1.077730,1.231278,0.165717,-0.176267,0.162534,-0.005996,0.496647,0.556876,0.938659,1.801580,0.437954,-1.010964,0.890913,0.890664,1.032362,1.406235,1.327993,0.463949,0.445040,1.536587,0.228398,-0.084494,0.953743,1.344694,1.442629,-1.365879,-0.401331,1.731802,-1.368519,-1.664258,1.548131,1.804646,-0.063664,-1.523345,0.341757,0.246041,0.663512,0.852818,-0.253158,-1.387478,-0.540041,-0.536578,0.485341,0.505342,0.262379,1.397904,0.238334,0.880559,0.311112,0.914784,0.305357,-0.027449,0.317600,-0.003598,0.614540,-0.080772,-0.982839,0.835624,0.245022,1.882108,0.474409
10228,0.258541,0.394887,1.139631,0.124345,0.334112,-0.249313,-0.656416,-0.286451,0.623418,-0.038716,0.487027,-1.436481,0.648252,-0.258505,0.053178,-0.967478,0.974563,0.276523,0.020590,-0.255615,0.404543,-0.187136,0.487048,-0.635785,0.665098,0.439613,0.612333,0.242670,0.701849,0.718582,0.352070,0.389586,0.297732,0.122329,0.149735,-0.913514,0.527089,1.345690,-0.003855,-0.152392,-0.060752,0.087633,0.386355,-0.173834,0.773213,-0.150746,-0.375163,-0.192155,0.230808,-0.466516,0.644616,-0.073319,0.717029,0.141658,0.485855,0.425576,0.948013,-0.502117,0.330549,0.179826,1.276513,0.473967,-0.161225,-0.729632,0.186982,0.473967,0.556367,-0.729632,1.276513,0.473967,-0.161225,-0.729632,0.142795,0.152335,0.229068,-0.189959,-0.004573,0.026024,-0.287843,0.337386,0.970074,-1.276856,0.401387,-0.208547,0.522440,-0.870332,0.250361,0.316322,-0.173001,-0.370223,0.017414,0.107913,-0.043022,0.035948,0.355295,-0.055492,0.261628,-0.559207,-0.451312,0.446286,0.677867,0.753663,0.206533,-0.479387,0.802454,-1.083197,0.576227,-0.764779,1.204779,0.455830,0.576262,-0.746073,0.478755,-1.365879,0.996479,-1.719031,1.503186,0.483281,-0.016410,-0.373293,0.226655,0.267168,0.341757,0.246041,0.339986,-0.272377,0.382017,0.409819,-0.540041,-0.536578,0.286719,-0.014602,0.177626,-0.610378,0.184899,-0.178171,0.311112,-0.187791,0.245101,0.131730,0.377406,0.360196,1.049756,-1.113569,-0.982839,0.024976,0.357921,0.712614,0.412946
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16850,-3.081844,0.954713,-1.896749,1.061666,-0.477338,-0.249313,0.175667,-0.286451,-0.871317,-0.038716,-0.961539,0.641671,0.098144,0.821918,0.326005,-0.019866,0.021325,1.198548,0.496762,0.478665,0.018470,0.268814,-0.256100,0.450946,-0.083532,0.143014,-0.084538,-0.215859,0.206454,0.063322,0.284841,0.118147,0.142689,0.197520,0.532702,-0.471233,-0.224937,0.714452,0.702379,1.038682,1.167461,0.965077,0.735811,-0.522881,0.014455,-0.150746,-0.375163,-0.192155,0.398514,0.101548,0.644616,0.423545,0.639833,0.519176,0.453271,1.042640,-0.355180,-0.630205,-0.802177,0.671156,0.040542,-0.683529,-0.302248,-0.797701,-0.665037,-0.683529,0.364416,-0.797701,0.040542,-0.683529,-0.302248,-0.797701,0.124602,0.010577,0.201283,-0.012797,0.656787,0.882922,0.674108,0.521810,-0.190480,0.974515,-0.947734,0.187704,-0.578183,-0.001158,0.285688,-0.107184,0.111839,-0.303130,0.432389,0.412703,0.347096,0.423237,0.157010,0.021772,0.135182,-0.191193,1.139891,1.621679,0.753880,0.595626,1.238794,0.196675,0.802454,1.401651,0.692171,0.439246,0.463684,1.433550,0.179762,-0.752789,0.478755,0.914160,-0.401331,-0.403765,0.023353,1.040847,0.529518,-1.099716,-0.383943,0.775774,0.401986,0.074440,0.034593,0.229863,0.562555,0.757290,-0.540041,-0.536578,0.062846,-0.054481,-0.223003,-0.041171,0.012660,0.154049,-0.242218,-0.102172,0.181291,0.313173,0.337839,0.299189,-1.061757,-1.113569,0.190406,-2.039406,1.138573,-0.573772,0.444185
6265,0.510545,0.612000,0.989337,0.423686,-0.477338,-0.249313,-1.960748,-0.286451,-0.871317,-0.038716,-0.961539,-1.436481,-0.355227,-0.704131,-0.278042,-0.019866,-0.227154,-1.128140,0.643965,0.217585,-0.013374,0.296602,-0.709774,-0.751793,0.231531,0.304192,0.372295,0.151800,0.328000,0.411377,0.199045,0.291794,0.266805,0.041333,-0.949142,0.899360,-0.307363,-0.748881,0.579433,0.517832,0.620254,0.452239,0.309296,-0.118246,0.773213,-1.496563,-0.375163,-1.463741,-0.524615,-0.254192,-0.710538,0.390557,0.810512,1.040816,0.517296,0.459163,0.361793,0.958356,-0.382281,-0.509076,0.314558,0.266039,0.622692,1.053721,-0.326247,0.266039,-0.306103,1.053721,0.314558,0.266039,0.622692,1.053721,-0.279540,-0.026027,-0.494516,0.108332,-0.260929,-0.609342,-1.212661,-1.253703,-1.282051,0.510472,0.165966,0.592922,-0.862181,-0.204611,0.090502,0.246155,0.498856,0.271018,-0.367701,-0.209330,-0.034157,-0.083901,0.222703,0.320363,-0.106177,0.339109,0.621519,0.224576,0.677867,0.952888,1.328418,1.026057,1.135514,0.657838,0.445040,1.199667,1.091550,-0.356192,0.766587,1.006838,1.033636,0.914160,-1.716055,1.406892,0.023353,-1.664258,0.529518,1.947391,-1.553080,-0.024910,0.076453,0.037914,-0.654581,0.371802,-1.085063,-1.387478,-0.540041,0.670842,0.114225,0.372584,-0.139094,1.020172,0.157479,0.846013,0.241147,0.914784,0.354460,0.370147,-0.004355,0.235223,-0.038338,1.017293,0.190406,0.560951,0.520047,0.206781,0.596838
11284,0.524381,0.508375,-1.090249,1.366541,0.875036,-0.249313,0.730344,-0.286451,1.576967,-0.038716,0.487027,0.641671,-1.275146,-0.075410,-0.698319,-0.967478,-1.430888,0.480681,1.049818,0.004561,0.428622,-0.225744,-1.102517,-0.076983,0.380496,0.345135,0.464612,0.649410,0.669225,0.539472,0.307846,0.358302,0.402382,0.349223,0.141282,0.420822,-0.349427,-0.582237,0.294117,0.330354,0.474200,0.440108,0.110913,0.184005,0.508199,0.816289,-0.375163,0.721543,-0.626543,-0.998613,-0.351165,0.545345,0.639833,0.519176,0.577055,0.552938,0.498688,0.176096,0.334603,0.281053,0.605510,0.251983,0.433258,-0.160121,0.302173,0.251983,-0.156483,-0.160121,0.605510,0.251983,0.433258,-0.160121,0.377519,0.099177,0.187930,0.326323,0.094587,0.305186,-0.052978,0.264152,-0.190480,1.316553,-0.575817,0.532547,-0.443032,0.832892,0.099600,0.425766,-0.025796,0.143895,0.246412,0.437981,0.316292,0.462008,0.305620,0.177453,0.505511,0.284100,0.777448,0.918811,0.506649,0.408930,0.206533,0.196675,0.582154,0.463949,0.576227,0.106586,0.657490,0.804078,0.219403,-0.445931,-0.353634,0.026139,0.996479,-0.403765,0.023353,1.040847,-0.680175,-0.718912,0.490782,0.267168,0.180938,0.145196,0.193393,0.371802,-1.421585,0.913988,-0.540041,0.670842,0.295352,0.166579,0.185769,0.224441,-0.287536,0.070212,0.093081,-0.020176,0.143063,0.538758,0.119874,0.562462,-0.038338,-1.113569,-0.982839,-0.779052,1.138573,-0.573772,0.612351
860,-0.045089,-0.029524,0.413196,1.061666,-0.477338,-0.249313,-0.656416,-0.286451,-0.871317,-0.038716,-0.961539,-0.167720,-0.189328,1.389182,0.557758,1.559407,-0.911584,1.082380,0.234543,0.004561,-0.597655,0.661012,-0.408190,2.200210,0.231531,0.143014,0.252051,0.441552,0.818456,0.487200,0.307846,0.219236,0.201359,0.193027,0.027706,0.573450,-0.184547,-0.820132,-0.071868,-0.548878,-0.596655,-0.038335,-0.096152,0.352365,0.773213,-0.689047,-0.375163,-0.700765,0.641696,1.128348,0.283097,0.121808,-0.002035,0.141658,0.347755,-0.124930,-0.051771,0.304222,0.885636,-0.354696,-0.097661,-0.068815,-0.377403,0.710585,1.513899,-0.068815,0.658728,0.710585,-0.097661,-0.068815,-0.377403,0.710585,0.295006,0.570791,-0.051211,1.006688,-0.226806,0.148428,-0.617891,0.588440,0.505874,0.974515,1.084958,0.216454,0.487216,1.187544,0.060327,0.057386,0.640223,0.678611,0.258695,0.440000,0.361610,0.331088,0.173362,0.295910,-0.151387,0.907996,0.215100,-0.438414,0.301298,0.044816,-1.047970,0.196675,-0.369602,0.228562,0.294038,0.106586,-0.475190,0.306008,-0.412020,0.075372,-1.658445,0.026139,1.408579,0.435294,-1.368519,1.040847,-0.680175,1.133451,0.953956,-0.347128,0.076453,0.700567,-0.138570,1.054222,-0.023559,0.215824,0.647405,0.670842,0.349951,0.415960,-0.299403,1.133152,-0.081146,0.756145,-0.242218,0.864832,0.313697,0.207975,0.377406,0.235223,-1.061757,-1.113569,0.190406,0.024976,0.729906,2.888700,0.934154


In [146]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [147]:
import numpy as np
import pandas as pd

# Ustawienia początkowe
n_features = 149  # liczba cech
sequence_length = 10  # długość sekwencji

# Funkcja do przekształcania danych wejściowych w sekwencje
def create_sequences(X, y, sequence_length):
    n_samples = len(X) - sequence_length + 1
    X_seq = np.zeros((n_samples, sequence_length, X.shape[1]))
    y_seq = np.zeros(n_samples)

    for i in range(n_samples):
        X_seq[i] = X.iloc[i:i+sequence_length].values
        y_seq[i] = y.iloc[i+sequence_length-1]  # przypisujemy etykietę z ostatniego kroku w sekwencji

    return X_seq, y_seq

# Przetwarzanie X_train i y_train
X_train_seq, y_train_seq = create_sequences(X_train, y_train, sequence_length)

# Przetwarzanie X_test i y_test (zakładam, że y_test również jest dostępny)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, sequence_length)

In [148]:
import tensorflow as tf

# Definiowanie modelu
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(50, input_shape=(sequence_length, n_features), return_sequences=True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(50),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Zakładam klasyfikację binarną; dostosuj w razie potrzeby
])

# Kompilacja modelu
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Dostosuj loss i activation w zależności od problemu

# Trenowanie modelu
history = model.fit(X_train_seq, y_train_seq, epochs=30, validation_data=(X_test_seq, y_test_seq), verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30

KeyboardInterrupt: 

# Score prediction (home_team_goal_count, away_team_goal_count)