# Пайплайн <a id='top'></a>

## Imports

In [None]:
import yaml

import pandas as pd
from pandas_profiling import ProfileReport
import numpy as np
import seaborn as sns
pd.set_option('display.max_columns', None)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

from data_transformer import DataTransformer
from models import BoostingModel
from scorer import MoneyScorer

# PATHS

In [None]:
DATA_PATH = 'data/all_england_v2.8.csv'
FEATURES_PATH = 'data/features.yaml'

## Load data

In [3]:
raw_train_data = pd.read_csv(DATA_PATH)

with open(FEATURES_PATH) as f:
    all_features_dict = yaml.safe_load(f)

In [4]:
dict_type = type(all_features_dict)

In [5]:
for key, item in all_features_dict.items():
    if type(item) == dict_type:
        print(f"'{key}':")
        for inner_key in item.keys():
            print(f"\t'{inner_key}'")
    else:
        print(f"'{key}'")

        

'base_features'
'scored_features'
'result_coef_features'
'names':
	'team_names'
	'country_names'
	'city_names'
	'manager_names'
'manager_features'
'squad_features':
	'common_features'
	'detail_features'
'city_features'
'money_features'
'coefficients':
	'double_chance_features'
	'total_coef_features'
	'handicap_features'
	'half_features'
	'odd_features'
	'correct_score_features'
	'time_match_features'
	'both_scored_features'


In [6]:
# profile = ProfileReport(raw_train_data, minimal=True)
# profile.to_file("output.html")

## Preprocess data

In [7]:
def base_data_preprocess(data):
    
    preprocessed_data = data.copy()
    
#     preprocessed_data = preprocessed_data.fillna(1.01)
    preprocessed_data['day'] = pd.to_datetime(preprocessed_data.date, format='%d.%m.%Y')
    preprocessed_data['day_of_week'] = preprocessed_data['day'].dt.day_name()
    preprocessed_data['year'] = preprocessed_data['day'].dt.year
    preprocessed_data.date = preprocessed_data.day.values.astype(np.int64) // 10 ** 9
    preprocessed_data.country = 'England'
    preprocessed_data = preprocessed_data.sort_values(by='date')
    preprocessed_data = preprocessed_data.drop(columns=['link'])
    drop_index = preprocessed_data[preprocessed_data.home_goalkeepers_average_age.isna()].index
    preprocessed_data = preprocessed_data.drop(index=drop_index)

    return preprocessed_data

In [8]:
train_data = base_data_preprocess(raw_train_data)

train_data.tail(5)

Unnamed: 0,date,time,day_of_week,season,country,league,league_level,home_team,away_team,home_win_rate,draw_rate,away_win_rate,home_double_chance_rate,away_double_chance_rate,no_draw_rate,total_over_1_rate,total_under_1_rate,total_over_15_rate,total_under_15_rate,total_over_2_rate,total_under_2_rate,total_over_25_rate,total_under_25_rate,total_over_3_rate,total_under_3_rate,total_over_35_rate,total_under_35_rate,both_team_to_score_yes,both_team_to_score_no,home_handicap_0,away_handicap_0,home_handicap_minus_15,away_handicap_minus_15,home_handicap_minus_1,away_handicap_minus_1,home_handicap_plus_15,away_handicap_plus_15,home_handicap_plus_1,away_handicap_plus_1,home_first_half_handicap_0,away_first_half_handicap_0,home_first_half_handicap_minus_15,away_first_half_handicap_minus_15,home_first_half_handicap_minus_1,away_first_half_handicap_minus_1,home_first_half_handicap_plus_15,away_first_half_handicap_plus_15,home_first_half_handicap_plus_1,away_first_half_handicap_plus_1,home_second_half_handicap_0,away_second_half_handicap_0,home_second_half_handicap_minus_15,away_second_half_handicap_minus_15,home_second_half_handicap_minus_1,away_second_half_handicap_minus_1,home_second_half_handicap_plus_15,away_second_half_handicap_plus_15,home_second_half_handicap_plus_1,away_second_half_handicap_plus_1,home_first_half_win_rate,draw_first_half_rate,away_first_half_win_rate,home_second_half_win_rate,draw_second_half_rate,away_second_half_win_rate,home_double_chance_first_half_rate,away_double_chance_first_half_rate,no_draw_first_half_rate,home_double_chance_second_half_rate,away_double_chance_second_half_rate,no_draw_second_half_rate,total_first_half_over_05_rate,total_first_half_under_05_rate,total_first_half_over_1_rate,total_first_half_under_1_rate,total_first_half_over_15_rate,total_first_half_under_15_rate,total_first_half_over_2_rate,total_first_half_under_2_rate,total_second_half_over_05_rate,total_second_half_under_05_rate,total_second_half_over_1_rate,total_second_half_under_1_rate,total_second_half_over_15_rate,total_second_half_under_15_rate,total_second_half_over_2_rate,total_second_half_under_2_rate,both_team_to_score_first_half_yes,both_team_to_score_first_half_no,both_team_to_score_second_half_yes,both_team_to_score_second_half_no,odd,even,odd_first,odd_second,even_first,even_second,correct_score10,correct_score20,correct_score21,correct_score30,correct_score31,correct_score32,correct_score40,correct_score41,correct_score00,correct_score11,correct_score22,correct_score33,correct_score01,correct_score02,correct_score12,correct_score03,correct_score13,correct_score23,correct_score04,correct_score14,home_home,home_draw,home_away,draw_home,draw_draw,draw_away,away_home,away_draw,away_away,home_scored,away_scored,home_first_half_scored,away_first_half_scored,home_second_half_scored,away_second_half_scored,home_manager_working_days,away_manager_working_days,home_manager_name,away_manager_name,home_manager_start_date,home_manager_birthday,home_manager_country,away_manager_start_date,away_manager_birthday,away_manager_country,month,home_squad_size,home_average_age,home_amount_of_foreigners,home_e_market_value,home_total_market_value,away_squad_size,away_average_age,away_amount_of_foreigners,away_e_market_value,away_total_market_value,home_stadium,home_stadium_capacity,home_city,away_city,is_derby,home_goalkeepers_average_age,home_defenders_average_age,home_midfields_average_age,home_attacks_average_age,home_goalkeepers_total_market_value,home_defenders_total_market_value,home_midfields_total_market_value,home_attacks_total_market_value,home_goalkeepers_e_market_value,home_defenders_e_market_value,home_midfields_e_market_value,home_attacks_e_market_value,home_is_manager_and_league_same_country,away_goalkeepers_average_age,away_defenders_average_age,away_midfields_average_age,away_attacks_average_age,away_goalkeepers_total_market_value,away_defenders_total_market_value,away_midfields_total_market_value,away_attacks_total_market_value,away_goalkeepers_e_market_value,away_defenders_e_market_value,away_midfields_e_market_value,away_attacks_e_market_value,away_is_manager_and_league_same_country,home_last_winter_window_departures_average_age,home_last_winter_window_departures_sum,home_last_winter_window_departures_total_market_value,home_last_winter_window_arrivals_average_age,home_last_winter_window_arrivals_sum,home_last_winter_window_arrivals_total_market_value,home_last_summer_window_departures_average_age,home_last_summer_window_departures_sum,home_last_summer_window_departures_total_market_value,home_last_summer_window_arrivals_average_age,home_last_summer_window_arrivals_sum,home_last_summer_window_arrivals_total_market_value,away_last_winter_window_departures_average_age,away_last_winter_window_departures_sum,away_last_winter_window_departures_total_market_value,away_last_winter_window_arrivals_average_age,away_last_winter_window_arrivals_sum,away_last_winter_window_arrivals_total_market_value,away_last_summer_window_departures_average_age,away_last_summer_window_departures_sum,away_last_summer_window_departures_total_market_value,away_last_summer_window_arrivals_average_age,away_last_summer_window_arrivals_sum,away_last_summer_window_arrivals_total_market_value,day,year
9502,1650240000,14:00,Monday,2021-2022,England,league-two,4,Forest Green Rovers,Oldham Athletic,1.43,4.57,7.06,1.1,2.84,1.21,1.04,9.13,1.23,3.84,1.34,3.09,1.76,2.09,2.26,1.63,2.84,1.4,1.94,1.81,1.14,5.15,2.23,1.67,1.69,2.16,0.0,0.0,1.01,11.5,1.22,4.3,4.66,1.17,3.37,1.29,0.0,0.0,0.0,0.0,1.2,4.54,3.68,1.28,2.65,1.48,0.0,0.0,0.0,0.0,1.92,2.38,6.11,1.74,2.84,5.71,1.1,1.79,1.51,1.11,1.99,1.37,1.34,3.08,1.71,2.05,2.57,1.47,4.61,1.15,1.17,4.26,1.35,3.05,1.94,1.8,3.01,1.36,5.05,1.17,3.76,1.27,1.89,1.88,2.05,1.98,1.77,1.83,6.21,6.0,7.19,8.17,9.52,18.84,16.0,20.0,11.03,7.29,14.22,52.07,12.95,24.22,14.77,57.61,42.0,36.99,101.0,101.0,1.98,19.0,56.0,4.1,6.85,16.0,25.0,20.0,13.5,2,0,2,0,0,0,326,85,Rob Edwards,John Sheridan,1622048000.0,409597200.0,Wales,1642871000.0,-165740400.0,Ireland,4,23,23.9,8,103000.0,238000000.0,29,25.3,13,119000.0,345000000.0,The Fully Charged New Lawn,5140,"Nailsworth, Gloucestershire","Oldham, Greater Manchester",0,25.0,24.83,23.33,23.5,100000.0,550000.0,148000000.0,250000.0,50000.0,92000.0,164000.0,42000.0,0,23.75,25.4,26.22,25.0,550000.0,100000000.0,175000000.0,150000.0,138000.0,100000.0,194000.0,25000.0,0,21.5,0.0,100000.0,20.0,0.0,300000.0,25.4,0.0,210000000.0,23.0,0.0,153000000.0,20.0,0.0,200000.0,22.9,0.0,250000.0,25.1,0.0,245000000.0,22.8,0.0,283000000.0,2022-04-18,2022
9562,1650240000,14:00,Monday,2021-2022,England,league-two,4,Northampton Town,Harrogate Town,1.51,4.06,6.6,1.11,2.58,1.25,1.07,6.84,1.34,3.06,1.56,2.39,2.08,1.76,2.94,1.38,3.56,1.26,2.17,1.64,1.17,4.78,2.48,1.52,1.88,1.94,0.0,0.0,,14.5,1.25,3.94,5.58,1.12,4.21,1.2,0.0,0.0,0.0,0.0,1.24,4.04,4.3,1.22,3.15,1.36,0.0,0.0,0.0,0.0,2.11,2.17,5.89,1.83,2.6,5.82,1.1,1.65,1.61,1.1,1.89,1.44,1.42,2.71,1.98,1.77,2.97,1.36,6.11,1.09,1.25,3.56,1.51,2.46,2.22,1.61,3.87,1.23,5.8,1.14,4.3,1.22,1.89,1.88,2.11,2.01,1.73,1.8,5.1,5.59,7.5,8.37,10.96,24.18,19.0,27.0,8.23,6.77,16.05,57.61,10.93,24.15,15.36,57.61,47.05,52.06,101.0,101.0,2.19,19.0,56.0,4.04,5.65,14.0,28.0,20.0,12.0,3,0,1,2,0,0,432,4716,Jon Brady,Simon Weaver,1612890000.0,158864400.0,Australia,1242752000.0,251398800.0,England,4,27,24.5,6,142000.0,383000000.0,23,26.2,6,82000.0,188000000.0,Sixfields Stadium,7798,"Northampton, East Midlands","Harrogate, North Yorkshire",0,27.5,24.64,23.4,24.33,50000.0,178000000.0,100000.0,190000000.0,25000.0,161000.0,20000.0,211000.0,0,29.0,27.86,25.0,25.0,300000.0,450000.0,900000.0,225000.0,150000.0,64000.0,100000.0,45000.0,1,22.8,0.0,123000000.0,21.4,0.0,145000000.0,24.0,110000000.0,275000000.0,24.5,0.0,190000000.0,27.7,0.0,0.0,23.1,0.0,475000.0,23.7,0.0,525000.0,24.4,0.0,300000.0,2022-04-18,2022
9457,1650240000,14:00,Monday,2021-2022,England,league-two,4,Crawley Town,Walsall FC,2.01,3.31,3.86,1.27,1.81,1.34,1.08,6.41,1.35,3.01,1.59,2.33,2.14,1.73,3.03,1.36,3.67,1.25,1.9,1.85,1.44,2.78,3.65,1.25,2.9,1.39,1.06,8.44,1.07,7.14,1.52,2.53,7.79,1.06,6.33,1.1,1.01,17.0,1.01,15.0,1.52,2.53,6.6,1.11,5.1,1.17,1.02,13.5,1.03,11.5,2.62,2.05,4.27,2.35,2.41,3.9,1.19,1.43,1.68,1.21,1.54,1.51,1.44,2.64,2.03,1.73,3.07,1.34,6.11,1.09,1.25,3.51,1.52,2.42,2.25,1.6,3.99,1.22,4.9,1.18,3.64,1.29,1.9,1.87,2.15,2.03,1.7,1.79,5.96,7.54,7.64,12.92,12.95,21.56,40.0,40.0,7.74,5.64,12.42,52.07,8.29,14.09,10.71,31.89,24.13,31.85,101.0,101.0,3.15,14.5,38.0,4.9,4.8,8.5,28.0,15.0,6.8,1,0,0,1,0,0,865,62,John Yems,Michael Flynn,1575479000.0,-326530800.0,England,1644858000.0,340563600.0,Wales,4,33,25.2,10,81000.0,268000000.0,25,25.6,9,103000.0,258000000.0,The People's Pension Stadium,6134,"Crawley, West Sussex","Walsall, West Midlands",0,26.33,27.0,22.75,26.25,50000.0,800000.0,975000.0,850000.0,17000.0,89000.0,75000.0,106000.0,1,23.5,26.7,24.86,25.17,100000.0,158000000.0,250000.0,650000.0,50000.0,158000.0,36000.0,108000.0,0,21.4,0.0,550000.0,18.6,0.0,500000.0,25.5,0.0,140000000.0,23.1,0.0,120000000.0,23.0,0.0,650000.0,24.0,0.0,875000.0,23.9,0.0,108000000.0,20.7,0.0,375000.0,2022-04-18,2022
9329,1650240000,14:00,Monday,2021-2022,England,league-two,4,Colchester United,Bradford City,2.51,3.16,2.95,1.42,1.54,1.36,1.09,5.95,1.4,2.81,1.69,2.15,2.28,1.64,3.35,1.3,4.01,1.21,1.93,1.82,1.77,2.07,4.83,1.15,3.98,1.22,1.1,5.88,1.14,4.97,1.8,2.01,11.0,1.04,8.04,1.06,1.03,13.0,1.04,11.0,1.8,2.01,8.8,1.06,7.1,1.1,1.05,10.0,1.07,8.6,3.07,2.01,3.58,2.84,2.3,3.15,1.25,1.33,1.71,1.32,1.38,1.55,1.48,2.53,2.14,1.66,3.25,1.31,6.45,1.08,1.3,3.31,1.62,2.21,2.42,1.52,4.17,1.17,5.25,1.16,3.84,1.26,1.9,1.87,2.15,2.04,1.7,1.78,6.25,9.11,8.46,17.68,16.05,24.25,70.0,60.0,7.08,5.4,12.48,52.08,6.89,10.81,9.22,21.57,19.35,26.81,101.0,90.0,4.04,15.0,35.0,5.8,4.6,6.55,33.0,15.0,4.8,3,0,2,1,0,0,89,53,Wayne Brown,Mark Hughes,1642525000.0,240858000.0,England,1645636000.0,-194684400.0,Wales,4,25,26.4,5,182000.0,455000000.0,31,25.7,10,121000.0,375000000.0,JobServe Community Stadium,10083,"Colchester, Essex","Bradford, West Yorkshire",0,29.0,26.44,26.57,24.5,250000.0,185000000.0,160000000.0,850000.0,83000.0,206000.0,229000.0,142000.0,1,28.5,24.75,23.2,28.18,350000.0,675000.0,675000.0,205000000.0,175000.0,84000.0,68000.0,186000.0,0,25.8,0.0,220000000.0,24.7,0.0,125000000.0,25.4,0.0,175000000.0,23.9,0.0,103000000.0,22.0,0.0,125000.0,23.5,0.0,190000000.0,24.8,0.0,170000000.0,22.0,0.0,625000.0,2022-04-18,2022
9566,1650240000,14:00,Monday,2021-2022,England,league-two,4,Swindon Town,Leyton Orient,1.91,3.5,4.07,1.25,1.92,1.31,1.06,7.48,1.31,3.23,1.5,2.53,2.01,1.82,2.77,1.42,3.39,1.29,1.85,1.9,1.38,3.0,3.32,1.3,2.63,1.47,1.05,8.69,1.07,7.37,1.44,2.79,6.73,1.09,5.35,1.13,1.01,17.0,1.01,15.0,1.48,2.65,5.9,1.13,4.44,1.21,1.02,13.5,1.03,11.5,2.43,2.15,4.45,2.28,2.47,3.86,1.17,1.5,1.63,1.21,1.56,1.48,1.41,2.76,1.9,1.84,2.89,1.38,5.81,1.1,1.23,3.65,1.48,2.54,2.17,1.64,3.72,1.25,4.8,1.18,3.52,1.3,1.89,1.88,2.09,2.0,1.74,1.81,6.13,7.51,7.49,12.3,12.31,20.48,35.0,35.0,8.43,5.79,11.86,42.06,8.86,15.21,10.75,36.91,24.14,29.33,101.0,101.0,2.75,15.0,41.0,4.7,5.4,9.5,26.0,16.0,7.4,1,2,0,1,1,1,271,40,Ben Garner,Richie Wellens,1626800000.0,327517200.0,England,1646759000.0,322851600.0,England,4,25,23.8,7,202000.0,505000000.0,29,24.1,12,91000.0,263000000.0,The County Ground,15728,"Swindon, Wiltshire",London,0,24.33,24.33,24.33,21.0,400000.0,140000000.0,225000000.0,100000000.0,133000.0,156000.0,250000.0,250000.0,1,23.67,23.82,24.63,24.0,250000.0,850000.0,800000.0,725000.0,83000.0,77000.0,100000.0,104000.0,1,21.9,0.0,850000.0,21.0,0.0,203000000.0,23.5,0.0,248000000.0,22.9,0.0,195000000.0,22.6,0.0,250000.0,23.8,0.0,675000.0,24.0,0.0,850000.0,21.3,0.0,550000.0,2022-04-18,2022


In [9]:
train_data.shape

(9655, 210)

In [10]:
nan_count = train_data.isna().sum().sort_values(ascending=False)
nan_count = nan_count[nan_count > 0]
nan_col_index = nan_count.index
nan_count

home_first_half_handicap_plus_15      160
home_first_half_handicap_plus_1       104
home_handicap_plus_15                  73
away_first_half_handicap_minus_15      42
home_second_half_handicap_plus_15      41
home_handicap_plus_1                   37
away_first_half_handicap_minus_1       27
home_second_half_handicap_plus_1       23
total_over_1_rate                      20
away_handicap_minus_15                 14
away_second_half_handicap_minus_15     13
away_handicap_minus_1                  13
away_second_half_handicap_minus_1       4
home_handicap_0                         2
dtype: int64

In [11]:
def prepare_for_yaml(features_list) -> str:
    
    result_yaml = ''
    
    for feature in features_list:
        result_yaml += f"- '{feature}'\n"
        
    return result_yaml

# Разбиение фичей на группы [ПРОПУСТИТЬ](#numeric)

In [None]:
enumerated_features = dict()

for idx, feature in enumerate(list(train_data.columns)):
    enumerated_features[idx] = feature

In [None]:
enumerated_features

In [None]:
base_features = list(train_data.iloc[0:2, 0:9].columns)
base_features.append(enumerated_features[126])
base_features.append(enumerated_features[127])
base_features.append(enumerated_features[208])
base_features.append(enumerated_features[209])
base_features.append('month')


print(prepare_for_yaml(base_features))

In [None]:
squad_features = list(train_data.iloc[0:2,143:153].columns)
print(prepare_for_yaml(squad_features))

In [None]:
squad_detail_features = list(train_data.iloc[0:2, 158:208])
print(prepare_for_yaml(squad_detail_features[:10]))

In [None]:
money_features = list(train_data.filter(like='market_value', axis=1).columns) + list(train_data.filter(like='s_sum', axis=1).columns)
print(prepare_for_yaml(money_features))

In [None]:
city_features = list(train_data.iloc[0:2,153:158].columns)
print(prepare_for_yaml(city_features))

In [None]:
result_coef_features = [enumerated_features[9], enumerated_features[10], enumerated_features[11]]
print(prepare_for_yaml(result_coef_features))

In [None]:
double_chance_features = list(train_data.filter(like='chance', axis=1).columns)
print(prepare_for_yaml(double_chance_features))

In [None]:
total_coef_features = list(train_data.filter(like='total_over', axis=1).columns) + list(train_data.filter(like='total_under', axis=1).columns)
print(prepare_for_yaml(total_coef_features))

In [None]:
handicap_features = list(train_data.iloc[0:2,29:39].columns)
print(prepare_for_yaml(handicap_features))

In [None]:
half_features = list(train_data.iloc[0:2,39:91].columns)
print(prepare_for_yaml(half_features[:10]))

In [None]:
odd_features = list(train_data.iloc[0:2,91:97].columns)

print(prepare_for_yaml(odd_features))

In [None]:
correct_score_features = list(train_data.filter(like='correct_score', axis=1).columns)
print(prepare_for_yaml(correct_score_features))

In [None]:
time_match_features = list(train_data.iloc[0:2,117:126].columns)
print(prepare_for_yaml(time_match_features))

In [None]:
both_scored_features = list(train_data.iloc[0:2,87:91].columns)
both_scored_features += list(train_data.iloc[0:2,128:132].columns)
both_scored_features.append(enumerated_features[27])
both_scored_features.append(enumerated_features[28])

print(prepare_for_yaml(both_scored_features))

In [None]:
manager_features = tuple(train_data.filter(like='manager', axis=1).columns)

print(prepare_for_yaml(manager_features))

<a id='numeric'></a>


<a id='transformer'></a>
# Трансформер 

[наверх](#top)

In [12]:
numeric_features = tuple(train_data.select_dtypes(include=['int', 'float']).columns)
print(prepare_for_yaml(numeric_features[:10]))
print(f'Number of numeric features: {len(numeric_features)}')

- 'date'
- 'league_level'
- 'home_win_rate'
- 'draw_rate'
- 'away_win_rate'
- 'home_double_chance_rate'
- 'away_double_chance_rate'
- 'no_draw_rate'
- 'total_over_1_rate'
- 'total_under_1_rate'

Number of numeric features: 195


In [13]:
categorical_features = tuple(train_data.select_dtypes(include=['object']).columns)
print(prepare_for_yaml(categorical_features))
print(f'Number of numeric features: {len(categorical_features)}')

- 'time'
- 'day_of_week'
- 'season'
- 'country'
- 'league'
- 'home_team'
- 'away_team'
- 'home_manager_name'
- 'away_manager_name'
- 'home_manager_country'
- 'away_manager_country'
- 'home_stadium'
- 'home_city'
- 'away_city'

Number of numeric features: 14


In [156]:
features = {'cat_features':categorical_features,
            'num_features':numeric_features,
            'grouped_features':all_features_dict
           }

transformer_context = {'data':train_data,
                       'features':features
                      }

transformer = DataTransformer(transformer_context)

In [157]:
train, val, test, decode_labels, encode_labels = transformer.run_logic() 

All categorical features are already encoded!
Features are already generated!


In [158]:
test.head()

Unnamed: 0,date,time,day_of_week,season,country,league,league_level,home_team,away_team,home_win_rate,draw_rate,away_win_rate,home_double_chance_rate,away_double_chance_rate,no_draw_rate,total_over_1_rate,total_under_1_rate,total_over_15_rate,total_under_15_rate,total_over_2_rate,total_under_2_rate,total_over_25_rate,total_under_25_rate,total_over_3_rate,total_under_3_rate,total_over_35_rate,total_under_35_rate,both_team_to_score_yes,both_team_to_score_no,home_handicap_0,away_handicap_0,home_handicap_minus_15,away_handicap_minus_15,home_handicap_minus_1,away_handicap_minus_1,home_handicap_plus_15,away_handicap_plus_15,home_handicap_plus_1,away_handicap_plus_1,home_first_half_handicap_0,away_first_half_handicap_0,home_first_half_handicap_minus_15,away_first_half_handicap_minus_15,home_first_half_handicap_minus_1,away_first_half_handicap_minus_1,home_first_half_handicap_plus_15,away_first_half_handicap_plus_15,home_first_half_handicap_plus_1,away_first_half_handicap_plus_1,home_second_half_handicap_0,away_second_half_handicap_0,home_second_half_handicap_minus_15,away_second_half_handicap_minus_15,home_second_half_handicap_minus_1,away_second_half_handicap_minus_1,home_second_half_handicap_plus_15,away_second_half_handicap_plus_15,home_second_half_handicap_plus_1,away_second_half_handicap_plus_1,home_first_half_win_rate,draw_first_half_rate,away_first_half_win_rate,home_second_half_win_rate,draw_second_half_rate,away_second_half_win_rate,home_double_chance_first_half_rate,away_double_chance_first_half_rate,no_draw_first_half_rate,home_double_chance_second_half_rate,away_double_chance_second_half_rate,no_draw_second_half_rate,total_first_half_over_05_rate,total_first_half_under_05_rate,total_first_half_over_1_rate,total_first_half_under_1_rate,total_first_half_over_15_rate,total_first_half_under_15_rate,total_first_half_over_2_rate,total_first_half_under_2_rate,total_second_half_over_05_rate,total_second_half_under_05_rate,total_second_half_over_1_rate,total_second_half_under_1_rate,total_second_half_over_15_rate,total_second_half_under_15_rate,total_second_half_over_2_rate,total_second_half_under_2_rate,both_team_to_score_first_half_yes,both_team_to_score_first_half_no,both_team_to_score_second_half_yes,both_team_to_score_second_half_no,odd,even,odd_first,odd_second,even_first,even_second,correct_score10,correct_score20,correct_score21,correct_score30,correct_score31,correct_score32,correct_score40,correct_score41,correct_score00,correct_score11,correct_score22,correct_score33,correct_score01,correct_score02,correct_score12,correct_score03,correct_score13,correct_score23,correct_score04,correct_score14,home_home,home_draw,home_away,draw_home,draw_draw,draw_away,away_home,away_draw,away_away,home_manager_working_days,away_manager_working_days,home_manager_name,away_manager_name,home_manager_start_date,home_manager_birthday,home_manager_country,away_manager_start_date,away_manager_birthday,away_manager_country,month,home_squad_size,home_average_age,home_amount_of_foreigners,home_e_market_value,home_total_market_value,away_squad_size,away_average_age,away_amount_of_foreigners,away_e_market_value,away_total_market_value,home_stadium,home_stadium_capacity,home_city,away_city,is_derby,home_goalkeepers_average_age,home_defenders_average_age,home_midfields_average_age,home_attacks_average_age,home_goalkeepers_total_market_value,home_defenders_total_market_value,home_midfields_total_market_value,home_attacks_total_market_value,home_goalkeepers_e_market_value,home_defenders_e_market_value,home_midfields_e_market_value,home_attacks_e_market_value,home_is_manager_and_league_same_country,away_goalkeepers_average_age,away_defenders_average_age,away_midfields_average_age,away_attacks_average_age,away_goalkeepers_total_market_value,away_defenders_total_market_value,away_midfields_total_market_value,away_attacks_total_market_value,away_goalkeepers_e_market_value,away_defenders_e_market_value,away_midfields_e_market_value,away_attacks_e_market_value,away_is_manager_and_league_same_country,home_last_winter_window_departures_average_age,home_last_winter_window_departures_sum,home_last_winter_window_departures_total_market_value,home_last_winter_window_arrivals_average_age,home_last_winter_window_arrivals_sum,home_last_winter_window_arrivals_total_market_value,home_last_summer_window_departures_average_age,home_last_summer_window_departures_sum,home_last_summer_window_departures_total_market_value,home_last_summer_window_arrivals_average_age,home_last_summer_window_arrivals_sum,home_last_summer_window_arrivals_total_market_value,away_last_winter_window_departures_average_age,away_last_winter_window_departures_sum,away_last_winter_window_departures_total_market_value,away_last_winter_window_arrivals_average_age,away_last_winter_window_arrivals_sum,away_last_winter_window_arrivals_total_market_value,away_last_summer_window_departures_average_age,away_last_summer_window_departures_sum,away_last_summer_window_departures_total_market_value,away_last_summer_window_arrivals_average_age,away_last_summer_window_arrivals_sum,away_last_summer_window_arrivals_total_market_value,day,year,target,log_home_e_market_value,log_home_total_market_value,log_away_e_market_value,log_away_total_market_value,log_home_goalkeepers_total_market_value,log_home_defenders_total_market_value,log_home_midfields_total_market_value,log_home_attacks_total_market_value,log_home_goalkeepers_e_market_value,log_home_defenders_e_market_value,log_home_midfields_e_market_value,log_home_attacks_e_market_value,log_away_goalkeepers_total_market_value,log_away_defenders_total_market_value,log_away_midfields_total_market_value,log_away_attacks_total_market_value,log_away_goalkeepers_e_market_value,log_away_defenders_e_market_value,log_away_midfields_e_market_value,log_away_attacks_e_market_value,log_home_last_winter_window_departures_total_market_value,log_home_last_winter_window_arrivals_total_market_value,log_home_last_summer_window_departures_total_market_value,log_home_last_summer_window_arrivals_total_market_value,log_away_last_winter_window_departures_total_market_value,log_away_last_winter_window_arrivals_total_market_value,log_away_last_summer_window_departures_total_market_value,log_away_last_summer_window_arrivals_total_market_value,log_home_last_winter_window_departures_sum,log_home_last_winter_window_arrivals_sum,log_home_last_summer_window_departures_sum,log_home_last_summer_window_arrivals_sum,log_away_last_winter_window_departures_sum,log_away_last_winter_window_arrivals_sum,log_away_last_summer_window_departures_sum,log_away_last_summer_window_arrivals_sum
6891,1649980800,12,0,5,0,1,3,29,41,3.51,3.28,2.18,1.69,1.32,1.34,1.11,5.85,1.42,2.8,1.72,2.12,2.33,1.63,3.48,1.29,4.07,1.21,2.0,1.78,2.54,1.53,7.28,1.07,6.33,1.1,1.21,4.08,1.31,3.29,2.36,1.59,15.0,1.01,14.0,1.02,1.05,8.46,1.08,6.93,2.36,1.59,12.5,1.03,11.0,1.04,1.09,7.4,1.14,5.75,4.04,2.01,2.8,3.65,2.3,2.53,1.38,1.2,1.72,1.46,1.24,1.55,1.49,2.5,2.17,1.64,3.31,1.3,6.95,1.08,1.31,3.25,1.64,2.17,2.44,1.51,4.37,1.16,5.45,1.15,3.94,1.25,1.9,1.87,2.17,2.04,1.69,1.78,7.16,10.46,9.2,13.59,13.18,13.78,0.0,0.0,7.25,5.87,11.16,14.18,5.9,7.83,7.85,11.62,11.67,13.46,0.0,0.0,6.0,16.0,31.0,7.7,4.6,5.1,41.0,15.0,3.38,4,16,311,36,1649610000.0,434134800.0,19,1648573000.0,-191574000.0,12,4,26,23.7,8,195000.0,508000000.0,30,22.5,9,97000.0,290000000.0,45,10153,57,22,0,25.5,20.29,23.7,26.43,50000.0,200000.0,333000000.0,150000000.0,25000.0,29000.0,333000.0,214000.0,0,22.5,23.0,22.5,21.63,100000.0,700000.0,75000.0,203000000.0,50000.0,58000.0,9000.0,253000.0,1,23.3,600000.0,750000.0,21.2,0.0,305000000.0,25.6,0.0,120000000.0,24.9,0.0,125000000.0,19.6,357000.0,100000.0,22.0,0.0,195000000.0,22.8,0.0,103000000.0,20.6,0.0,825000.0,2022-04-15,2022,3,12.18076,20.045992,11.482477,19.485391,10.819798,12.206078,19.623653,18.826146,10.126671,10.275086,12.715901,12.273736,11.512935,13.458837,11.225257,19.128717,10.819798,10.968216,9.105091,12.441149,13.52783,19.535822,18.603002,18.643824,11.512935,19.08851,18.45024,13.62314,13.304687,0.0,0.0,0.0,12.785494,0.0,0.0,0.0
9333,1649980800,4,0,5,0,2,4,88,85,1.5,4.12,6.49,1.12,2.56,1.24,1.06,7.41,1.31,3.21,1.5,2.51,2.02,1.82,2.81,1.42,3.4,1.28,2.1,1.69,1.17,4.72,2.46,1.53,1.88,1.95,0.0,0.0,0.0,14.5,1.25,3.94,5.3,1.13,3.99,1.22,0.0,0.0,0.0,0.0,1.24,4.04,4.3,1.22,3.1,1.37,0.0,0.0,0.0,0.0,2.05,2.22,5.91,1.86,2.61,5.49,1.1,1.69,1.58,1.12,1.85,1.43,1.4,2.81,1.9,1.84,2.88,1.38,5.68,1.1,1.23,3.7,1.48,2.54,2.17,1.64,3.72,1.25,5.55,1.15,4.04,1.24,1.89,1.88,2.09,2.0,1.74,1.81,5.3,5.66,7.36,8.41,10.88,24.11,18.0,25.0,8.59,6.76,15.94,57.6,10.97,24.12,15.27,57.61,47.03,47.04,101.0,101.0,2.18,18.0,53.0,4.1,5.85,13.5,27.0,19.0,12.0,1413,86,137,116,1527872000.0,381171600.0,19,1642525000.0,240858000.0,19,4,24,25.2,6,96000.0,230000000.0,25,26.4,5,182000.0,455000000.0,62,8830,34,14,0,24.5,24.88,25.14,26.2,500000.0,600000.0,550000.0,650000.0,125000.0,75000.0,79000.0,130000.0,1,29.0,26.44,26.57,24.5,250000.0,185000000.0,160000000.0,850000.0,83000.0,206000.0,229000.0,142000.0,1,22.6,0.0,500000.0,20.4,0.0,350000.0,25.4,0.0,650000.0,21.6,0.0,500000.0,25.8,0.0,220000000.0,24.7,0.0,125000000.0,25.4,0.0,175000000.0,23.9,0.0,103000000.0,2022-04-15,2022,3,11.472114,19.25359,12.111767,19.935808,13.122365,13.304687,13.217675,13.384729,11.736077,11.225257,11.277216,11.775297,12.42922,19.035866,18.890684,13.652993,11.326608,12.235636,12.341482,11.863589,13.122365,12.765691,13.384729,13.122365,19.209138,18.643824,18.980297,18.45024,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6766,1649980800,12,0,5,0,1,3,81,15,4.38,3.77,1.79,2.06,1.21,1.27,1.05,9.0,1.25,3.73,1.38,2.97,1.84,2.02,2.4,1.57,2.97,1.38,1.74,2.05,3.21,1.36,9.52,1.05,8.09,1.06,1.36,3.02,1.58,2.37,2.88,1.42,17.0,1.01,15.0,1.01,1.11,6.11,1.17,4.77,2.88,1.42,14.0,1.02,12.0,1.03,1.16,5.3,1.25,3.94,4.56,2.25,2.27,4.08,2.59,2.12,1.57,1.16,1.57,1.65,1.19,1.44,1.35,3.04,1.73,2.03,2.62,1.45,4.74,1.15,1.2,3.94,1.38,2.88,2.0,1.75,3.22,1.32,4.3,1.22,3.2,1.35,1.89,1.88,2.06,1.98,1.76,1.83,9.44,12.17,9.85,13.97,13.38,13.57,0.0,0.0,9.65,6.59,10.43,14.02,6.74,7.24,7.14,9.79,9.74,12.44,0.0,0.0,7.8,15.0,24.0,9.9,5.75,4.7,38.0,14.5,2.68,115,1485,116,186,1640020000.0,361987200.0,22,1521652000.0,337626000.0,19,4,27,25.1,10,177000.0,478000000.0,27,26.3,10,297000.0,803000000.0,28,5327,71,55,0,25.67,26.0,23.63,25.43,250000.0,118000000.0,175000000.0,160000000.0,83000.0,131000.0,219000.0,229000.0,0,25.0,26.17,25.25,29.17,250000.0,173000000.0,355000000.0,250000000.0,83000.0,288000.0,296000.0,417000.0,1,21.9,120000000.0,950000.0,23.9,0.0,215000000.0,25.0,390000.0,810000000.0,24.2,0.0,610000000.0,23.6,0.0,600000.0,24.2,0.0,185000000.0,24.8,270000000.0,958000000.0,24.2,0.0,308000000.0,2022-04-15,2022,0,12.083911,19.985121,12.601491,20.503865,12.42922,18.586195,18.980297,18.890684,11.326608,11.78296,12.296832,12.341482,12.42922,18.968802,19.687628,19.336971,11.326608,12.570719,12.598118,12.940844,13.764218,19.186149,20.512545,20.22897,13.304687,19.035866,20.680358,19.54561,18.603002,0.0,12.873905,0.0,0.0,0.0,19.413933,0.0
9510,1649980800,12,0,5,0,2,4,7,2,2.33,3.23,3.15,1.37,1.63,1.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.86,1.88,1.69,2.2,4.34,1.18,3.53,1.27,1.09,6.08,1.12,5.33,1.72,2.12,8.61,1.05,7.08,1.08,1.02,13.0,1.04,11.5,1.71,2.14,7.6,1.09,6.05,1.13,1.05,10.5,1.06,8.8,2.88,2.06,3.69,2.6,2.4,3.32,1.23,1.37,1.68,1.28,1.44,1.51,1.44,2.65,2.0,1.75,2.97,1.36,5.96,1.09,1.27,3.37,1.51,2.44,2.23,1.61,3.86,1.23,4.84,1.18,3.52,1.3,1.91,1.86,2.17,2.01,1.69,1.8,6.46,8.82,8.05,16.36,14.68,21.56,60.0,50.0,7.79,5.49,11.78,47.05,7.57,11.66,9.29,24.19,19.37,26.75,101.0,90.0,3.74,15.0,36.0,5.55,4.7,7.0,31.0,15.0,5.2,417,388,238,209,1613927000.0,399744000.0,19,1616432000.0,46976400.0,19,4,30,25.7,10,155000.0,465000000.0,24,26.5,10,304000.0,730000000.0,42,11916,81,59,0,33.0,23.5,25.38,26.57,200000.0,600000.0,283000000.0,103000000.0,100000.0,75000.0,217000.0,146000.0,1,27.0,26.0,25.33,29.4,200000.0,150000000.0,420000000.0,140000000.0,200000.0,188000.0,420000.0,280000.0,1,23.5,0.0,110000000.0,21.1,0.0,900000.0,24.4,140000000.0,245000000.0,22.6,0.0,175000000.0,20.4,0.0,850000.0,20.9,0.0,360000000.0,27.0,0.0,318000000.0,24.9,0.0,348000000.0,2022-04-15,2022,3,11.951187,19.957548,12.624786,20.408555,12.206078,13.304687,19.460957,18.45024,11.512935,11.225257,12.287657,11.891369,12.206078,18.826146,19.855765,18.757153,12.206078,12.144203,12.948012,12.542548,18.515991,13.710151,19.316769,18.980297,13.652993,19.701615,19.577562,19.667713,0.0,0.0,18.757153,0.0,0.0,0.0,0.0,0.0
9408,1649980800,12,0,5,0,2,4,43,90,3.46,3.17,2.21,1.68,1.32,1.36,1.09,6.0,1.39,2.84,1.67,2.18,2.26,1.65,3.31,1.3,3.97,1.21,1.94,1.81,2.43,1.53,7.19,1.06,6.32,1.09,1.2,4.18,1.3,3.41,2.34,1.6,16.0,1.01,14.0,1.02,1.05,8.61,1.08,7.03,2.34,1.6,12.5,1.03,11.0,1.04,1.09,7.6,1.13,5.9,4.03,2.01,2.79,3.6,2.3,2.55,1.38,1.2,1.71,1.45,1.25,1.55,1.48,2.54,2.13,1.67,3.22,1.32,6.45,1.08,1.3,3.31,1.61,2.23,2.41,1.53,4.23,1.17,5.05,1.17,3.68,1.28,1.9,1.87,2.15,2.04,1.7,1.78,7.49,12.7,10.06,29.28,21.56,29.38,101.0,101.0,7.18,5.44,12.54,52.09,5.9,8.06,8.02,14.84,14.33,24.21,50.0,50.0,6.2,15.0,29.0,7.7,4.54,5.15,38.0,14.5,3.48,82,429,284,100,1642871000.0,-165740400.0,14,1612890000.0,158864400.0,7,4,29,25.3,13,119000.0,345000000.0,27,24.5,6,142000.0,383000000.0,9,10638,70,11,0,23.75,25.4,26.22,25.0,550000.0,100000000.0,175000000.0,150000.0,138000.0,100000.0,194000.0,25000.0,0,27.5,24.64,23.4,24.33,50000.0,178000000.0,100000.0,190000000.0,25000.0,161000.0,20000.0,211000.0,0,20.0,0.0,200000.0,22.9,0.0,250000.0,25.1,0.0,245000000.0,22.8,0.0,283000000.0,22.8,0.0,123000000.0,21.4,0.0,145000000.0,24.0,110000000.0,275000000.0,24.5,0.0,190000000.0,2022-04-15,2022,0,11.686887,19.659055,11.863589,19.763546,13.217675,18.420681,18.980297,11.918397,11.835016,11.512935,12.175619,10.126671,10.819798,18.997294,11.512935,19.062535,10.126671,11.989166,9.903538,12.259618,12.206078,12.42922,19.316769,19.460957,18.627695,18.792244,19.432282,19.062535,0.0,0.0,0.0,0.0,0.0,0.0,18.515991,0.0


In [17]:
train.target.value_counts()

3    4135
0    2947
1    2503
Name: target, dtype: int64

In [18]:
print(f"Class_0 rate {train.target.value_counts()[0] / train.shape[0]}")
print(f"Class_1 rate {train.target.value_counts()[1] / train.shape[0]}")
print(f"Class_3 rate {train.target.value_counts()[3] / train.shape[0]}")

Class_0 rate 0.30745957224830467
Class_1 rate 0.26113719353155973
Class_3 rate 0.43140323422013566


# Проверить данные на:

- мультиколлинеарность
- forward/backward selection
- 


# Переход к [модели](#model)

## Generating some features and testing hypotheses

# Оставим только "не-коэффициенты" и декодируем названия команд 

In [None]:
def _set_target(row):
    """ Set target feature from score """

    if row.home_scored > row.away_scored:
        return 3
    elif row.home_scored == row.away_scored:
        return 1
    else:
        return 0

train_data_base = train_data[all_features_dict['base_features'] + all_features_dict['squad_features']].copy()
train_data_base['target'] = train_data_base.apply(_set_target, axis=1)
train_data_base.tail()

In [None]:
for feature in money_features:
    train_data_base[f"log_{feature}"] = train_data_base[feature].apply(np.log)

train_data_base.sample(5)

# ПЕРЕДЕЛАТЬ! Считаем сезонные total :
- забитые
- пропущенные
- количество очков

In [None]:
query = '((home_team == @team) | (away_team == @team)) & (season == @season) & (league == @league)'

season_data = pd.DataFrame()

total_features = train_data_base.copy()

for league in total_features.league.unique():

    for season in total_features.season.unique():
        
        season_league_data = total_features.query('(season == @season) & (league == @league)')
        
        for team in season_league_data.home_team.unique():   
            
            season_data = season_league_data.query('((home_team == @team) | (away_team == @team))')
            
            total_points = 0
            total_scored = 0
            total_missed = 0


            for idx in season_data.index:

                if season_data.loc[idx, 'home_team'] == team:

                    total_points += season_data.loc[idx, 'target']
                    total_scored += season_data.loc[idx, 'home_scored']
                    total_missed += season_data.loc[idx, 'away_scored']

                else:

                    home = season_data.loc[idx, 'home_scored']
                    away = season_data.loc[idx, 'away_scored']

                    away_match_score = 3 if home < away else 1 if home == away else 0

                    total_points += away_match_score
                    total_scored += season_data.loc[idx, 'away_scored']
                    total_missed += season_data.loc[idx, 'home_scored']

            condition_home = ((total_features.home_team == team) & (total_features.season == season))
            condition_away = ((total_features.away_team == team) & (total_features.season == season))
                    
            total_features.loc[condition_home,'total_points_home'] = total_points
            total_features.loc[condition_away,'total_points_away'] = total_points
            
            total_features.loc[condition_home,'total_scored_home'] = total_scored
            total_features.loc[condition_away,'total_scored_away'] = total_scored

            total_features.loc[condition_home,'total_missed_home'] = total_missed
            total_features.loc[condition_away,'total_missed_away'] = total_missed

            total_features.loc[condition_home,'total_diff_home'] = total_scored - total_missed
            total_features.loc[condition_away,'total_diff_away'] = total_scored - total_missed            



            print(f"TEAM: {team} LEAGUE: {league} SEASON: {season}\ntotal points:{total_points}\ttotal scored:{total_scored}\ttotal missed:{total_missed}\ttotal_diff: {total_scored - total_missed}")
            print('_________')
        
total_features.tail()        

In [None]:
home_team_alltime = train_data_base.groupby(['home_team'], as_index=False) \
                        .agg(home_mean_scored_season = ('home_scored', 'mean'),
                            home_mean_missed_season = ('away_scored', 'mean')                                 
                            )

away_team_alltime = train_data_base.groupby(['away_team'], as_index=False) \
                        .agg(away_mean_scored_alltime = ('home_scored', 'mean'),
                            away_mean_missed_alltime = ('away_scored', 'mean')                                 
                            )

home_team_alltime.head()

In [None]:
train_data_base = train_data_base.merge(home_team_alltime, how='left', on=['home_team', 'season'])
train_data_base = train_data_base.merge(away_team_alltime, how='left', on=['away_team', 'season'])

train_data_base.head()

# ПЕРЕДЕЛАТЬ! Личные встречи за все время забитые/пропущенные

In [None]:
# personal_battles = train_with_names.groupby(['home_team', 'away_team'], as_index=False) \
#                         .agg(total_home_scored = ('home_scored', 'sum'),
#                             total_home_missed = ('away_scored', 'sum'),
                            
#                             )

# personal_battles

# Подсчет текущих очков и вин/луз стриков по сезонам

In [None]:
query = '((home_team == @team) | (away_team == @team)) & (league == @season)'

def calculate_win_streak(actual_win_streak: int, match_result: int) -> int:
    
    new_win_streak = actual_win_streak
    
    if match_result == 3:
        
        new_win_streak += 1
    
    else:
        
        new_win_streak = 0
        
    return new_win_streak

def calculate_lose_streak(actual_lose_streak: int, match_result: int) -> int:
    
    new_lose_streak = actual_lose_streak
    
    if match_result == 0:
        
        new_lose_streak += 1
    
    else:
        
        new_lose_streak = 0
        
    return new_lose_streak


train_with_current_points = train_data_base.copy()

for season in train_with_current_points.season.unique():
    
    for team in train_with_current_points.home_team.unique():    

        current_points = 0
        current_win_streak = 0
        current_lose_streak = 0

        team_season_data = train_with_current_points.query(query)

        for idx in team_season_data.index:
            
#             if season == 'premier-league-2021-2022' and team == 'Arsenal':

#                 print(f"Match of {team_season_data.loc[idx, 'date']}\n"
#                          f"Home team: {team_season_data.loc[idx, 'home_team']}, "
#                          f"away team: {team_season_data.loc[idx, 'away_team']}"
#                          )

            if team_season_data.loc[idx, 'home_team'] == team:

                train_with_current_points.loc[idx, 'home_current_points'] = current_points

                current_points += team_season_data.loc[idx, 'target']

#                 if season == 'premier-league-2021-2022' and team == 'Arsenal':
    
#                     print(f"result is {team_season_data.loc[idx, 'target']} points for home team {team_season_data.loc[idx, 'home_team']}\n"
#                          f"Match score {team_season_data.loc[idx, 'home_scored']}:{team_season_data.loc[idx, 'away_scored']}")

                train_with_current_points.loc[idx, 'home_current_lose_streak'] = current_lose_streak

                train_with_current_points.loc[idx, 'home_current_win_streak'] = current_win_streak

                current_lose_streak = calculate_lose_streak(current_lose_streak, team_season_data.loc[idx, 'target'])

                current_win_streak = calculate_win_streak(current_win_streak, team_season_data.loc[idx, 'target'])

            else:

                train_with_current_points.loc[idx, 'away_current_points'] = current_points

                home = team_season_data.loc[idx, 'home_scored']
                away = team_season_data.loc[idx, 'away_scored']

                away_match_score = 3 if home < away else 1 if home == away else 0

                current_points += away_match_score
                
#                 if season == 'premier-league-2021-2022' and team == 'Arsenal':

#                     print(f"result is {away_match_score} points for away team {team_season_data.loc[idx, 'away_team']}\n"
#                          f"Match score {home}:{away}")

                train_with_current_points.loc[idx, 'away_current_lose_streak'] = current_lose_streak

                train_with_current_points.loc[idx, 'away_current_win_streak'] = current_win_streak

                current_lose_streak = calculate_lose_streak(current_lose_streak, away_match_score)

                current_win_streak = calculate_win_streak(current_win_streak, away_match_score)
            
#             if season == 'premier-league-2021-2022' and team == 'Arsenal':

#                 print('______________________________________________________________ \n')

train_with_current_points.home_current_points = train_with_current_points.home_current_points.astype(int)
train_with_current_points.away_current_points = train_with_current_points.away_current_points.astype(int)
train_with_current_points.away_current_win_streak = train_with_current_points.away_current_win_streak.astype(int)
train_with_current_points.away_current_lose_streak = train_with_current_points.away_current_lose_streak.astype(int)
train_with_current_points.home_current_win_streak = train_with_current_points.home_current_win_streak.astype(int)
train_with_current_points.home_current_lose_streak = train_with_current_points.home_current_lose_streak.astype(int)

In [None]:
train_with_current_points[(train_with_current_points.home_team == 'Manchester City')|(train_with_current_points.away_team == 'Manchester City')].tail(15)

In [None]:
train_with_names = train_with_names.merge(train_with_current_points, how='left')
train_with_names.sample(5)

# <a id='model'></a> Model  
[go to transformer](#transformer)

[go to top](#top)

In [161]:
train = train.reset_index()
train = train.drop(columns=['index'])
train.head()

Unnamed: 0,date,time,day_of_week,season,country,league,league_level,home_team,away_team,home_win_rate,draw_rate,away_win_rate,home_double_chance_rate,away_double_chance_rate,no_draw_rate,total_over_1_rate,total_under_1_rate,total_over_15_rate,total_under_15_rate,total_over_2_rate,total_under_2_rate,total_over_25_rate,total_under_25_rate,total_over_3_rate,total_under_3_rate,total_over_35_rate,total_under_35_rate,both_team_to_score_yes,both_team_to_score_no,home_handicap_0,away_handicap_0,home_handicap_minus_15,away_handicap_minus_15,home_handicap_minus_1,away_handicap_minus_1,home_handicap_plus_15,away_handicap_plus_15,home_handicap_plus_1,away_handicap_plus_1,home_first_half_handicap_0,away_first_half_handicap_0,home_first_half_handicap_minus_15,away_first_half_handicap_minus_15,home_first_half_handicap_minus_1,away_first_half_handicap_minus_1,home_first_half_handicap_plus_15,away_first_half_handicap_plus_15,home_first_half_handicap_plus_1,away_first_half_handicap_plus_1,home_second_half_handicap_0,away_second_half_handicap_0,home_second_half_handicap_minus_15,away_second_half_handicap_minus_15,home_second_half_handicap_minus_1,away_second_half_handicap_minus_1,home_second_half_handicap_plus_15,away_second_half_handicap_plus_15,home_second_half_handicap_plus_1,away_second_half_handicap_plus_1,home_first_half_win_rate,draw_first_half_rate,away_first_half_win_rate,home_second_half_win_rate,draw_second_half_rate,away_second_half_win_rate,home_double_chance_first_half_rate,away_double_chance_first_half_rate,no_draw_first_half_rate,home_double_chance_second_half_rate,away_double_chance_second_half_rate,no_draw_second_half_rate,total_first_half_over_05_rate,total_first_half_under_05_rate,total_first_half_over_1_rate,total_first_half_under_1_rate,total_first_half_over_15_rate,total_first_half_under_15_rate,total_first_half_over_2_rate,total_first_half_under_2_rate,total_second_half_over_05_rate,total_second_half_under_05_rate,total_second_half_over_1_rate,total_second_half_under_1_rate,total_second_half_over_15_rate,total_second_half_under_15_rate,total_second_half_over_2_rate,total_second_half_under_2_rate,both_team_to_score_first_half_yes,both_team_to_score_first_half_no,both_team_to_score_second_half_yes,both_team_to_score_second_half_no,odd,even,odd_first,odd_second,even_first,even_second,correct_score10,correct_score20,correct_score21,correct_score30,correct_score31,correct_score32,correct_score40,correct_score41,correct_score00,correct_score11,correct_score22,correct_score33,correct_score01,correct_score02,correct_score12,correct_score03,correct_score13,correct_score23,correct_score04,correct_score14,home_home,home_draw,home_away,draw_home,draw_draw,draw_away,away_home,away_draw,away_away,home_manager_working_days,away_manager_working_days,home_manager_name,away_manager_name,home_manager_start_date,home_manager_birthday,home_manager_country,away_manager_start_date,away_manager_birthday,away_manager_country,month,home_squad_size,home_average_age,home_amount_of_foreigners,home_e_market_value,home_total_market_value,away_squad_size,away_average_age,away_amount_of_foreigners,away_e_market_value,away_total_market_value,home_stadium,home_stadium_capacity,home_city,away_city,is_derby,home_goalkeepers_average_age,home_defenders_average_age,home_midfields_average_age,home_attacks_average_age,home_goalkeepers_total_market_value,home_defenders_total_market_value,home_midfields_total_market_value,home_attacks_total_market_value,home_goalkeepers_e_market_value,home_defenders_e_market_value,home_midfields_e_market_value,home_attacks_e_market_value,home_is_manager_and_league_same_country,away_goalkeepers_average_age,away_defenders_average_age,away_midfields_average_age,away_attacks_average_age,away_goalkeepers_total_market_value,away_defenders_total_market_value,away_midfields_total_market_value,away_attacks_total_market_value,away_goalkeepers_e_market_value,away_defenders_e_market_value,away_midfields_e_market_value,away_attacks_e_market_value,away_is_manager_and_league_same_country,home_last_winter_window_departures_average_age,home_last_winter_window_departures_sum,home_last_winter_window_departures_total_market_value,home_last_winter_window_arrivals_average_age,home_last_winter_window_arrivals_sum,home_last_winter_window_arrivals_total_market_value,home_last_summer_window_departures_average_age,home_last_summer_window_departures_sum,home_last_summer_window_departures_total_market_value,home_last_summer_window_arrivals_average_age,home_last_summer_window_arrivals_sum,home_last_summer_window_arrivals_total_market_value,away_last_winter_window_departures_average_age,away_last_winter_window_departures_sum,away_last_winter_window_departures_total_market_value,away_last_winter_window_arrivals_average_age,away_last_winter_window_arrivals_sum,away_last_winter_window_arrivals_total_market_value,away_last_summer_window_departures_average_age,away_last_summer_window_departures_sum,away_last_summer_window_departures_total_market_value,away_last_summer_window_arrivals_average_age,away_last_summer_window_arrivals_sum,away_last_summer_window_arrivals_total_market_value,day,year,target,log_home_e_market_value,log_home_total_market_value,log_away_e_market_value,log_away_total_market_value,log_home_goalkeepers_total_market_value,log_home_defenders_total_market_value,log_home_midfields_total_market_value,log_home_attacks_total_market_value,log_home_goalkeepers_e_market_value,log_home_defenders_e_market_value,log_home_midfields_e_market_value,log_home_attacks_e_market_value,log_away_goalkeepers_total_market_value,log_away_defenders_total_market_value,log_away_midfields_total_market_value,log_away_attacks_total_market_value,log_away_goalkeepers_e_market_value,log_away_defenders_e_market_value,log_away_midfields_e_market_value,log_away_attacks_e_market_value,log_home_last_winter_window_departures_total_market_value,log_home_last_winter_window_arrivals_total_market_value,log_home_last_summer_window_departures_total_market_value,log_home_last_summer_window_arrivals_total_market_value,log_away_last_winter_window_departures_total_market_value,log_away_last_winter_window_arrivals_total_market_value,log_away_last_summer_window_departures_total_market_value,log_away_last_summer_window_arrivals_total_market_value,log_home_last_winter_window_departures_sum,log_home_last_winter_window_arrivals_sum,log_home_last_summer_window_departures_sum,log_home_last_summer_window_arrivals_sum,log_away_last_winter_window_departures_sum,log_away_last_winter_window_arrivals_sum,log_away_last_summer_window_departures_sum,log_away_last_summer_window_arrivals_sum
0,1474329600,29,5,0,0,1,3,31,72,2.0,3.64,3.92,1.27,1.84,1.31,1.95,1.85,1.27,3.84,1.43,2.92,1.96,1.94,2.64,1.51,3.28,1.35,1.73,2.11,1.48,2.91,3.76,1.28,2.94,1.43,1.07,9.0,1.1,7.8,1.51,2.57,0.0,0.0,5.9,1.14,0.0,0.0,0.0,0.0,1.49,2.63,0.0,0.0,4.74,1.19,0.0,0.0,0.0,0.0,2.57,2.21,4.32,2.36,2.48,4.08,1.18,1.45,1.59,1.2,1.53,1.48,1.37,3.0,1.95,1.85,2.79,1.44,6.4,1.12,1.22,4.0,1.43,2.83,2.11,1.73,0.0,0.0,4.2,1.23,3.26,1.34,1.94,1.86,2.1,1.91,1.73,1.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182,106,69,147,1458583000.0,55875600.0,22,1465150000.0,-291193200.0,19,9,36,24.7,9,191000.0,688000000.0,44,23.7,19,448000.0,1970000000.0,57,9088,78,22,0,24.25,25.0,26.18,23.09,400000.0,175000000.0,240000000.0,233000000.0,100000.0,194000.0,200000.0,211000.0,1,21.75,24.11,25.5,22.64,110000000.0,625000000.0,350000000.0,885000000.0,275000.0,347000.0,438000.0,632000.0,1,22.6,0.0,110000000.0,22.3,0.0,155000000.0,24.1,0.0,205000000.0,23.0,0.0,253000000.0,22.6,962000000.0,580000000.0,21.0,0.0,450000000.0,24.0,580000000.0,1925000000.0,25.2,130000000.0,860000000.0,2016-09-20,2016,1,12.160034,20.349299,13.012551,21.401299,12.899222,18.980297,19.296149,19.266549,11.512935,12.175619,12.206078,12.259618,18.515991,20.253262,19.673444,20.601098,12.52453,12.757083,12.989976,13.356646,18.515991,18.858936,19.138521,19.3489,20.178539,19.924758,21.378192,20.572443,0.0,0.0,0.0,0.0,20.684525,0.0,20.178539,18.683045
1,1474329600,29,5,0,0,1,3,52,7,2.87,3.5,2.57,1.54,1.44,1.33,1.9,1.9,1.25,4.04,1.39,3.1,1.89,2.02,2.47,1.57,3.1,1.38,1.63,2.28,2.08,1.85,6.1,1.14,5.2,1.18,1.17,5.2,1.25,4.2,2.0,1.81,0.0,0.0,0.0,0.0,0.0,0.0,1.09,7.7,2.0,1.81,0.0,0.0,0.0,0.0,0.0,0.0,1.11,6.6,3.38,2.21,3.08,3.16,2.47,2.86,1.33,1.28,1.59,1.37,1.31,1.48,1.37,3.0,1.9,1.9,2.72,1.46,5.75,1.14,1.22,4.0,1.4,2.96,2.04,1.78,0.0,0.0,3.88,1.26,3.02,1.39,1.9,1.9,2.1,1.91,1.73,1.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,266,907,154,25,1451326000.0,357494400.0,19,1395940000.0,251053200.0,19,9,39,21.8,14,194000.0,755000000.0,39,25.4,16,140000.0,548000000.0,74,15728,35,81,0,20.5,22.09,21.32,23.29,300000.0,168000000.0,358000000.0,200000000.0,150000.0,152000.0,188000.0,286000.0,1,26.6,24.57,25.42,26.0,450000.0,183000000.0,170000000.0,150000000.0,90000.0,130000.0,142000.0,188000.0,1,21.8,0.0,188000000.0,22.2,0.0,303000000.0,22.2,100000000.0,353000000.0,20.9,655000.0,243000000.0,23.7,350000.0,158000000.0,23.3,0.0,925000.0,23.3,0.0,130000000.0,22.3,0.0,163000000.0,2016-09-20,2016,0,12.175619,20.442228,11.849405,20.121786,12.611541,18.939475,19.696044,19.113828,11.918397,11.931642,12.144203,12.563751,13.017005,19.024997,18.951309,18.826146,11.407576,11.775297,11.863589,12.144203,19.051953,19.529243,19.681979,19.308572,18.878106,13.73755,18.683045,18.909261,0.0,0.0,18.420681,13.392392,12.765691,0.0,0.0,0.0
2,1474588800,29,0,0,0,0,2,27,58,2.49,3.24,3.26,1.37,1.57,1.37,1.05,8.5,1.29,3.46,1.51,2.6,2.12,1.82,2.91,1.42,3.54,1.29,1.79,2.01,1.77,2.22,5.0,1.16,4.04,1.24,1.09,7.0,1.13,6.0,1.71,2.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.71,2.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.04,2.09,3.76,2.81,2.32,3.5,1.23,1.33,1.66,1.26,1.39,1.54,1.39,3.0,1.9,1.9,2.79,1.44,6.9,1.1,1.25,3.74,1.49,2.6,2.21,1.66,0.0,0.0,4.2,1.23,3.24,1.35,1.94,1.86,2.1,2.0,1.73,1.73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1313,535,136,72,1361120000.0,-1407600.0,19,1428343000.0,387388800.0,22,9,33,26.2,19,580000.0,1915000000.0,57,24.5,20,530000.0,3023000000.0,18,23408,46,2,0,25.67,25.69,26.63,26.89,140000000.0,565000000.0,330000000.0,880000000.0,467000.0,435000.0,413000.0,978000.0,1,27.0,25.19,23.33,24.23,230000000.0,790000000.0,888000000.0,1115000000.0,329000.0,494000.0,423000.0,858000.0,1,23.7,0.0,183000000.0,23.5,0.0,135000000.0,25.6,173000000.0,860000000.0,24.7,300000.0,1265000000.0,24.5,820000000.0,993000000.0,23.4,875000.0,778000000.0,24.3,0.0,1160000000.0,24.4,111000000.0,1548000000.0,2016-09-23,2016,3,13.270785,21.372983,13.180634,21.829516,18.757153,20.152336,19.614603,20.595432,13.054087,12.983104,12.931205,13.793266,19.25359,20.487544,20.604482,20.83212,12.703816,13.110293,12.95513,13.662361,19.024997,18.720785,20.572443,20.958338,20.716241,20.472237,20.871686,21.16023,0.0,0.0,18.968802,12.611541,20.524815,13.68198,0.0,18.525041
3,1474675200,12,2,0,0,1,3,37,59,2.05,3.64,3.82,1.28,1.82,1.31,1.05,10.5,1.26,3.94,1.42,2.98,1.93,1.97,2.57,1.53,3.22,1.36,1.71,2.14,1.5,2.84,3.8,1.28,2.97,1.42,1.08,8.7,1.1,7.5,1.52,2.53,0.0,0.0,5.9,1.14,0.0,0.0,0.0,0.0,1.51,2.57,0.0,0.0,4.74,1.19,0.0,0.0,0.0,0.0,2.58,2.22,4.24,2.37,2.5,3.98,1.18,1.45,1.59,1.21,1.53,1.47,1.4,2.75,1.81,2.0,2.63,1.49,5.75,1.14,1.22,4.0,1.42,2.88,2.08,1.75,0.0,0.0,4.2,1.23,3.2,1.35,1.9,1.9,2.1,1.91,1.73,1.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,272,159,5,0.0,0.0,2,1451153000.0,-315644400.0,1,9,0,0.0,0,0.0,0.0,43,24.4,18,120000.0,515000000.0,26,11840,32,9,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,28.0,24.79,24.27,22.17,450000.0,193000000.0,143000000.0,135000000.0,75000.0,138000.0,130000.0,113000.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.2,0.0,145000000.0,22.4,0.0,875000.0,23.4,0.0,250000000.0,22.1,0.0,118000000.0,2016-09-24,2016,3,0.0,0.0,11.695255,20.059677,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.017005,19.078201,18.778355,18.720785,11.225257,11.835016,11.775297,11.635152,0.0,0.0,0.0,0.0,18.792244,13.68198,19.336971,18.586195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1474675200,12,2,0,0,0,2,23,64,2.46,3.2,3.34,1.35,1.58,1.37,1.08,7.0,1.37,3.08,1.64,2.29,2.33,1.69,3.38,1.33,4.1,1.23,1.87,1.93,1.7,2.34,5.0,1.16,4.04,1.24,1.08,7.4,1.12,6.3,1.69,2.17,10.0,1.05,8.2,1.08,0.0,0.0,0.0,0.0,1.68,2.19,8.3,1.08,6.7,1.11,0.0,0.0,0.0,0.0,3.07,2.03,3.92,2.81,2.26,3.64,1.21,1.33,1.7,1.24,1.39,1.57,1.45,2.78,2.09,1.74,3.04,1.38,0.0,0.0,1.31,3.48,1.59,2.36,2.39,1.58,0.0,0.0,4.6,1.2,3.6,1.29,1.94,1.86,2.2,2.0,1.67,1.73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,85,1423,260,178,1467310000.0,289501200.0,19,1351703000.0,-343983600.0,14,9,37,24.7,22,136000000.0,5045000000.0,42,25.0,18,618000.0,2595000000.0,20,40204,74,33,0,28.75,24.29,24.6,23.56,145000000.0,2055000000.0,1180000000.0,1665000000.0,363000.0,147000000.0,118000000.0,185000000.0,0,27.33,24.55,23.82,26.82,900000.0,700000000.0,990000000.0,815000000.0,300000.0,636000.0,582000.0,741000.0,0,25.6,580000.0,550000000.0,22.8,0.0,655000000.0,23.9,700000000.0,1705000000.0,24.5,858000000.0,1705000000.0,27.5,0.0,600000.0,25.6,132000.0,428000000.0,25.2,350000000.0,713000000.0,21.1,166000000.0,385000000.0,2016-09-24,2016,3,18.728165,22.341663,13.334245,21.676852,18.792244,21.443542,20.88878,21.233091,12.802161,18.805943,18.586195,19.035866,13.710151,20.366591,20.713216,20.518699,12.611541,13.362955,13.274227,13.515757,20.125429,20.300146,21.256831,21.256831,13.304687,19.874634,20.384992,19.768754,13.270785,0.0,20.366591,20.570115,0.0,11.790565,19.673444,18.927498


In [99]:
from sklearn.metrics import roc_auc_score, log_loss, accuracy_score
from catboost import CatBoostClassifier, Pool

In [21]:
cat_features = list(categorical_features)

model_data = {
    'train':train,
    'val':val,
    'test':test,
    'target':'target',
    'cat_features':cat_features
}

In [22]:
X_test = test.drop(columns=['target'])
y_test = test.target

In [152]:
def test_cv_generator(data, base_size=50, window=30):
    for i in range(0, len(data) - base_size - window + 1, window):
        train = data[:base_size+i]
        val = data[base_size+i:base_size+i+window]
        yield train, val

In [153]:
test_data = [i for i in range(150)]
gen = test_cv_generator(test_data)
gen

<generator object test_cv_generator at 0x7fb8fa686740>

In [154]:
i = 0
for train, val in gen:
    print(f"CV [{i}]")
    print(f"Train: {train}")
    print(f"Val: {val}")
    print('_____')
    i += 1

CV [0]
Train: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
Val: [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
_____
CV [1]
Train: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
Val: [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109]
_____
CV [2]
Train: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,

In [162]:
def get_cv_data(train_data, train_initial_size=7000, window=50):
    
    for index in range(0, train_data.shape[0] - train_initial_size - window + 1, window):
        
        train_cv = train_data.loc[:train_initial_size + i]
        val_cv = train_data.loc[train_initial_size + i: train_initial_size + i + window]
        yield train_cv, val_cv

In [164]:
cv_generator = get_cv_data(train)
train_1, val_1 = next(cv_generator)
val_1

Unnamed: 0,date,time,day_of_week,season,country,league,league_level,home_team,away_team,home_win_rate,draw_rate,away_win_rate,home_double_chance_rate,away_double_chance_rate,no_draw_rate,total_over_1_rate,total_under_1_rate,total_over_15_rate,total_under_15_rate,total_over_2_rate,total_under_2_rate,total_over_25_rate,total_under_25_rate,total_over_3_rate,total_under_3_rate,total_over_35_rate,total_under_35_rate,both_team_to_score_yes,both_team_to_score_no,home_handicap_0,away_handicap_0,home_handicap_minus_15,away_handicap_minus_15,home_handicap_minus_1,away_handicap_minus_1,home_handicap_plus_15,away_handicap_plus_15,home_handicap_plus_1,away_handicap_plus_1,home_first_half_handicap_0,away_first_half_handicap_0,home_first_half_handicap_minus_15,away_first_half_handicap_minus_15,home_first_half_handicap_minus_1,away_first_half_handicap_minus_1,home_first_half_handicap_plus_15,away_first_half_handicap_plus_15,home_first_half_handicap_plus_1,away_first_half_handicap_plus_1,home_second_half_handicap_0,away_second_half_handicap_0,home_second_half_handicap_minus_15,away_second_half_handicap_minus_15,home_second_half_handicap_minus_1,away_second_half_handicap_minus_1,home_second_half_handicap_plus_15,away_second_half_handicap_plus_15,home_second_half_handicap_plus_1,away_second_half_handicap_plus_1,home_first_half_win_rate,draw_first_half_rate,away_first_half_win_rate,home_second_half_win_rate,draw_second_half_rate,away_second_half_win_rate,home_double_chance_first_half_rate,away_double_chance_first_half_rate,no_draw_first_half_rate,home_double_chance_second_half_rate,away_double_chance_second_half_rate,no_draw_second_half_rate,total_first_half_over_05_rate,total_first_half_under_05_rate,total_first_half_over_1_rate,total_first_half_under_1_rate,total_first_half_over_15_rate,total_first_half_under_15_rate,total_first_half_over_2_rate,total_first_half_under_2_rate,total_second_half_over_05_rate,total_second_half_under_05_rate,total_second_half_over_1_rate,total_second_half_under_1_rate,total_second_half_over_15_rate,total_second_half_under_15_rate,total_second_half_over_2_rate,total_second_half_under_2_rate,both_team_to_score_first_half_yes,both_team_to_score_first_half_no,both_team_to_score_second_half_yes,both_team_to_score_second_half_no,odd,even,odd_first,odd_second,even_first,even_second,correct_score10,correct_score20,correct_score21,correct_score30,correct_score31,correct_score32,correct_score40,correct_score41,correct_score00,correct_score11,correct_score22,correct_score33,correct_score01,correct_score02,correct_score12,correct_score03,correct_score13,correct_score23,correct_score04,correct_score14,home_home,home_draw,home_away,draw_home,draw_draw,draw_away,away_home,away_draw,away_away,home_manager_working_days,away_manager_working_days,home_manager_name,away_manager_name,home_manager_start_date,home_manager_birthday,home_manager_country,away_manager_start_date,away_manager_birthday,away_manager_country,month,home_squad_size,home_average_age,home_amount_of_foreigners,home_e_market_value,home_total_market_value,away_squad_size,away_average_age,away_amount_of_foreigners,away_e_market_value,away_total_market_value,home_stadium,home_stadium_capacity,home_city,away_city,is_derby,home_goalkeepers_average_age,home_defenders_average_age,home_midfields_average_age,home_attacks_average_age,home_goalkeepers_total_market_value,home_defenders_total_market_value,home_midfields_total_market_value,home_attacks_total_market_value,home_goalkeepers_e_market_value,home_defenders_e_market_value,home_midfields_e_market_value,home_attacks_e_market_value,home_is_manager_and_league_same_country,away_goalkeepers_average_age,away_defenders_average_age,away_midfields_average_age,away_attacks_average_age,away_goalkeepers_total_market_value,away_defenders_total_market_value,away_midfields_total_market_value,away_attacks_total_market_value,away_goalkeepers_e_market_value,away_defenders_e_market_value,away_midfields_e_market_value,away_attacks_e_market_value,away_is_manager_and_league_same_country,home_last_winter_window_departures_average_age,home_last_winter_window_departures_sum,home_last_winter_window_departures_total_market_value,home_last_winter_window_arrivals_average_age,home_last_winter_window_arrivals_sum,home_last_winter_window_arrivals_total_market_value,home_last_summer_window_departures_average_age,home_last_summer_window_departures_sum,home_last_summer_window_departures_total_market_value,home_last_summer_window_arrivals_average_age,home_last_summer_window_arrivals_sum,home_last_summer_window_arrivals_total_market_value,away_last_winter_window_departures_average_age,away_last_winter_window_departures_sum,away_last_winter_window_departures_total_market_value,away_last_winter_window_arrivals_average_age,away_last_winter_window_arrivals_sum,away_last_winter_window_arrivals_total_market_value,away_last_summer_window_departures_average_age,away_last_summer_window_departures_sum,away_last_summer_window_departures_total_market_value,away_last_summer_window_arrivals_average_age,away_last_summer_window_arrivals_sum,away_last_summer_window_arrivals_total_market_value,day,year,target,log_home_e_market_value,log_home_total_market_value,log_away_e_market_value,log_away_total_market_value,log_home_goalkeepers_total_market_value,log_home_defenders_total_market_value,log_home_midfields_total_market_value,log_home_attacks_total_market_value,log_home_goalkeepers_e_market_value,log_home_defenders_e_market_value,log_home_midfields_e_market_value,log_home_attacks_e_market_value,log_away_goalkeepers_total_market_value,log_away_defenders_total_market_value,log_away_midfields_total_market_value,log_away_attacks_total_market_value,log_away_goalkeepers_e_market_value,log_away_defenders_e_market_value,log_away_midfields_e_market_value,log_away_attacks_e_market_value,log_home_last_winter_window_departures_total_market_value,log_home_last_winter_window_arrivals_total_market_value,log_home_last_summer_window_departures_total_market_value,log_home_last_summer_window_arrivals_total_market_value,log_away_last_winter_window_departures_total_market_value,log_away_last_winter_window_arrivals_total_market_value,log_away_last_summer_window_departures_total_market_value,log_away_last_summer_window_arrivals_total_market_value,log_home_last_winter_window_departures_sum,log_home_last_winter_window_arrivals_sum,log_home_last_summer_window_departures_sum,log_home_last_summer_window_arrivals_sum,log_away_last_winter_window_departures_sum,log_away_last_winter_window_arrivals_sum,log_away_last_summer_window_departures_sum,log_away_last_summer_window_arrivals_sum
7003,1605312000,16,2,4,0,1,3,87,73,1.74,3.65,4.97,1.18,2.11,1.29,1.07,7.0,1.37,3.01,1.61,2.3,2.19,1.71,3.16,1.35,3.8,1.23,2.05,1.75,1.27,3.6,3.06,1.38,2.37,1.59,1.01,11.5,1.03,10.5,1.37,3.1,7.0,1.1,5.35,1.16,0.0,0.0,1.01,17.0,1.36,3.15,5.45,1.15,4.04,1.24,1.01,17.0,1.01,15.0,2.37,2.12,5.48,2.14,2.5,4.8,1.12,1.55,1.64,1.14,1.66,1.5,1.45,2.68,2.07,1.76,3.12,1.34,0.0,0.0,1.3,3.58,1.57,2.41,2.35,1.6,0.0,0.0,5.19,1.15,3.9,1.23,1.94,1.87,2.12,2.03,1.72,1.79,6.03,7.23,8.41,11.71,13.73,26.17,35.0,42.0,8.22,6.73,15.35,57.11,10.46,20.3,13.93,56.57,37.45,44.52,100.0,100.0,2.64,16.0,59.0,4.43,4.88,10.0,29.0,17.0,8.27,394,376,56,193,1571245000.0,-65862000.0,19,1572800000.0,505155600.0,22,11,42,24.7,15,274000.0,1150000000.0,43,24.3,12,317000.0,1365000000.0,66,49000,50,43,0,23.25,23.57,24.17,27.0,400000.0,285000000.0,345000000.0,480000000.0,100000.0,204000.0,288000.0,400000.0,0,27.33,24.31,23.77,24.18,550000.0,465000000.0,355000000.0,490000000.0,183000.0,310000.0,254000.0,445000.0,1,25.0,0.0,178000000.0,24.3,338000.0,155000000.0,26.7,0.0,610000000.0,25.2,0.0,508000000.0,25.6,0.0,190000000.0,24.0,222000.0,565000000.0,24.6,720000.0,665000000.0,24.1,0.0,320000000.0,2020-11-14,2020,0,12.520887,20.863028,12.66666,21.03442,12.899222,19.468,19.659055,19.989297,11.512935,12.22588,12.570719,12.899222,13.217675,19.957548,19.687628,20.009916,12.117247,12.644331,12.445093,13.005832,18.997294,18.858936,20.22897,20.045992,19.062535,20.152336,20.315298,19.583832,0.0,12.730804,0.0,0.0,0.0,12.310437,13.487008,0.0
7004,1605312000,16,2,4,0,2,4,97,35,1.99,3.24,3.98,1.23,1.78,1.32,1.12,5.38,1.44,2.66,1.76,2.04,2.38,1.58,3.62,1.28,4.34,1.18,2.06,1.7,1.47,2.83,3.82,1.24,2.96,1.38,1.05,8.74,1.07,7.77,1.48,2.65,8.8,1.06,6.85,1.1,0.0,18.0,1.01,17.0,1.47,2.68,6.8,1.1,5.25,1.16,1.01,15.0,1.02,13.5,2.7,2.01,4.74,2.35,2.31,4.39,1.14,1.44,1.73,1.17,1.53,1.57,1.5,2.47,2.25,1.6,3.34,1.29,6.04,1.07,1.34,3.14,1.71,2.08,2.58,1.48,0.0,0.0,5.6,1.13,4.19,1.21,1.92,1.84,2.19,2.06,1.68,1.76,6.07,8.14,8.89,14.61,16.18,31.69,51.0,61.0,7.02,6.32,16.09,57.09,8.89,17.05,12.79,48.39,36.17,48.91,100.0,100.0,3.03,16.0,57.0,4.61,4.28,8.68,33.0,16.0,7.34,649,13,280,258,1549213000.0,-162802800.0,19,1604164000.0,-216457200.0,19,11,35,26.0,4,42000.0,148000000.0,35,26.4,6,116000.0,408000000.0,93,19052,69,29,0,26.0,25.18,27.22,25.9,400000.0,250000.0,275000.0,550000.0,100000.0,23000.0,28000.0,55000.0,1,31.0,27.63,24.9,25.77,200000.0,525000.0,193000000.0,143000000.0,67000.0,66000.0,193000.0,102000.0,0,21.0,0.0,0.0,17.9,0.0,100000.0,25.0,0.0,50000.0,25.7,0.0,450000.0,26.7,0.0,325000.0,25.8,0.0,100000000.0,27.0,0.0,290000000.0,24.7,0.0,218000000.0,2020-11-14,2020,0,10.645449,18.812723,11.661354,19.826778,12.899222,12.42922,12.52453,13.217675,11.512935,10.043293,10.239996,10.915107,12.206078,13.171155,19.078201,18.778355,11.112463,11.097425,12.170451,11.532738,0.0,11.512935,10.819798,13.017005,12.691584,18.420681,19.485391,19.200006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7005,1605312000,16,2,4,0,1,3,90,42,2.66,3.24,2.75,1.46,1.49,1.35,1.09,6.2,1.35,3.13,1.56,2.41,2.11,1.76,2.95,1.4,3.62,1.26,1.82,1.94,1.91,1.97,5.13,1.14,4.36,1.2,1.13,5.29,1.18,4.56,1.88,1.92,11.0,1.04,9.3,1.06,1.03,11.5,1.05,9.7,1.88,1.92,8.8,1.06,7.3,1.1,1.06,9.1,1.09,7.5,3.3,2.08,3.42,2.94,2.43,3.07,1.28,1.3,1.69,1.33,1.36,1.52,1.44,2.72,2.01,1.8,3.01,1.37,5.11,1.1,1.27,3.61,1.54,2.5,2.3,1.62,4.04,1.24,4.64,1.17,3.45,1.29,1.97,1.84,2.15,2.03,1.7,1.79,7.98,11.2,9.09,21.82,18.45,27.08,98.0,80.0,8.2,6.11,12.68,50.04,8.14,11.79,9.64,23.39,19.52,27.6,100.0,87.0,4.24,14.0,35.0,6.09,4.54,6.31,34.0,14.0,4.46,775,2249,265,164,1538327000.0,-193561200.0,19,1410973000.0,-227948400.0,19,11,40,25.2,9,95000.0,380000000.0,40,23.7,11,51000.0,205000000.0,60,7798,11,7,0,23.0,25.08,25.5,25.57,250000.0,140000000.0,850000.0,130000000.0,63000.0,117000.0,85000.0,93000.0,0,20.0,24.62,23.53,23.88,300000.0,750000.0,700000.0,300000.0,75000.0,58000.0,47000.0,38000.0,1,28.4,0.0,800000.0,24.7,0.0,900000.0,24.0,110000000.0,275000000.0,24.5,0.0,190000000.0,25.3,0.0,150000.0,25.0,0.0,575000.0,22.1,275000.0,108000000.0,21.6,0.0,600000.0,2020-11-14,2020,0,11.461643,19.755682,10.839601,19.138521,12.42922,18.757153,13.652993,18.683045,11.050906,11.669938,11.350418,11.440366,12.611541,13.52783,13.458837,12.611541,11.225257,10.968216,10.757924,10.545368,13.592368,13.710151,19.432282,19.062535,11.918397,13.262127,18.497642,13.304687,0.0,0.0,18.515991,0.0,0.0,0.0,12.52453,0.0
7006,1605312000,16,2,4,0,2,4,65,1,2.48,3.26,2.83,1.41,1.51,1.32,1.07,6.83,1.28,3.34,1.46,2.62,1.94,1.87,2.63,1.46,3.23,1.3,1.72,2.04,1.82,2.07,4.74,1.16,3.91,1.24,1.12,5.44,1.17,4.72,1.81,2.0,10.0,1.05,8.3,1.07,1.03,11.5,1.05,9.7,1.81,2.0,7.9,1.08,6.35,1.12,1.06,9.0,1.09,7.5,3.13,2.12,3.48,2.76,2.5,3.08,1.26,1.32,1.68,1.32,1.4,1.49,1.4,2.79,1.88,1.89,2.81,1.39,4.95,1.11,1.23,3.79,1.44,2.69,2.11,1.69,3.52,1.3,4.3,1.2,3.19,1.33,1.95,1.81,2.17,2.01,1.69,1.8,8.24,11.1,8.99,20.18,16.77,23.97,82.0,64.0,8.68,6.24,12.09,42.51,8.79,12.48,9.68,23.99,19.05,25.06,100.0,81.0,3.96,14.0,34.0,5.96,4.84,6.59,31.0,14.0,4.56,354,796,249,8,1574701000.0,148496400.0,19,1536512000.0,253299600.0,1,11,39,22.6,8,57000.0,223000000.0,33,24.7,4,83000.0,273000000.0,12,18202,30,48,0,23.0,21.57,23.4,22.91,250000.0,375000.0,900000.0,700000.0,63000.0,27000.0,90000.0,64000.0,1,25.0,26.33,22.9,25.11,700000.0,600000.0,750000.0,675000.0,233000.0,55000.0,75000.0,75000.0,0,20.7,0.0,350000.0,21.7,0.0,150000.0,24.8,0.0,155000000.0,21.5,0.0,160000000.0,24.4,0.0,100000.0,21.0,0.0,350000.0,22.5,0.0,140000000.0,22.7,0.0,650000.0,2020-11-14,2020,0,10.950824,19.222682,11.326608,19.424982,12.42922,12.834684,13.710151,13.458837,11.050906,10.203629,11.407576,11.066654,13.458837,13.304687,13.52783,13.422469,12.358798,10.915107,11.225257,11.225257,12.765691,11.918397,18.858936,18.890684,11.512935,12.765691,18.757153,13.384729,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7007,1605312000,16,2,4,0,1,3,7,81,3.22,3.37,2.25,1.65,1.36,1.33,1.08,6.48,1.34,3.16,1.56,2.42,2.1,1.77,2.95,1.4,3.62,1.26,1.84,1.93,2.32,1.66,6.48,1.09,5.61,1.13,1.19,4.3,1.3,3.47,2.22,1.66,14.0,1.02,12.0,1.03,1.06,9.4,1.09,7.6,2.24,1.65,11.0,1.04,9.6,1.06,1.09,7.4,1.14,5.85,3.89,2.1,2.92,3.56,2.46,2.59,1.39,1.22,1.66,1.45,1.26,1.52,1.43,2.75,2.0,1.82,3.01,1.37,5.34,1.09,1.26,3.7,1.54,2.49,2.3,1.62,4.2,1.23,4.68,1.17,3.51,1.29,1.94,1.86,2.12,2.01,1.72,1.8,8.93,14.18,10.36,30.66,23.29,30.19,100.0,100.0,8.42,6.23,12.72,51.04,7.4,9.32,8.86,16.97,15.69,25.02,63.0,57.0,5.5,14.0,31.0,7.36,4.69,5.31,40.0,14.0,3.47,327,896,108,233,1577034000.0,327517200.0,19,1527872000.0,399744000.0,19,11,41,24.6,12,88000.0,363000000.0,46,24.4,17,196000.0,903000000.0,42,11916,81,71,0,25.33,25.29,23.79,24.29,650000.0,128000000.0,155000000.0,150000.0,108000.0,91000.0,111000.0,21000.0,1,25.4,23.0,24.78,24.88,700000.0,213000000.0,490000000.0,130000000.0,117000.0,193000.0,245000.0,144000.0,0,24.0,0.0,150000.0,24.8,0.0,400000.0,24.4,140000000.0,245000000.0,22.6,0.0,175000000.0,24.9,0.0,260000000.0,24.3,0.0,200000000.0,25.0,390000.0,810000000.0,24.2,0.0,610000000.0,2020-11-14,2020,0,11.385103,19.709913,12.185875,20.621233,13.384729,18.667541,18.858936,11.918397,11.589896,11.418626,11.617294,9.952325,13.458837,19.176803,20.009916,18.683045,11.669938,12.170451,12.409018,11.877576,11.918397,12.899222,19.316769,18.980297,19.376192,19.113828,20.512545,20.22897,0.0,0.0,18.757153,0.0,0.0,0.0,12.873905,0.0
7008,1605312000,16,2,4,0,2,4,46,19,2.16,3.34,3.32,1.31,1.67,1.31,1.07,6.68,1.31,3.17,1.52,2.46,2.02,1.8,2.83,1.43,3.47,1.27,1.81,1.92,1.6,2.45,4.04,1.22,3.29,1.33,1.08,6.71,1.11,5.93,1.64,2.26,9.1,1.06,7.3,1.09,1.02,14.0,1.03,12.0,1.63,2.28,7.1,1.1,5.6,1.14,1.04,11.0,1.06,9.6,2.82,2.11,3.99,2.52,2.46,3.55,1.21,1.4,1.66,1.25,1.47,1.5,1.42,2.72,1.95,1.83,2.92,1.37,5.74,1.11,1.27,3.64,1.5,2.5,2.23,1.62,3.94,1.25,4.64,1.17,3.47,1.29,1.92,1.84,2.14,2.01,1.71,1.8,7.28,9.13,8.69,16.7,15.47,24.44,61.0,56.0,8.68,6.37,13.2,53.03,9.12,14.45,10.51,31.28,23.41,30.71,100.0,100.0,3.41,15.0,41.0,5.17,4.81,7.32,30.0,15.0,5.55,289,128,346,276,1580317000.0,501526800.0,19,1594228000.0,315075600.0,19,11,31,23.5,6,61000.0,190000000.0,39,25.9,10,92000.0,360000000.0,2,10847,49,13,0,22.25,23.7,24.1,23.0,100000.0,700000.0,105000000.0,50000.0,25000.0,70000.0,105000.0,7000.0,1,27.0,25.64,26.67,25.1,125000.0,170000000.0,975000.0,800000.0,42000.0,121000.0,81000.0,80000.0,1,24.0,0.0,200000.0,20.6,0.0,0.0,23.4,0.0,950000.0,22.0,0.0,120000000.0,23.7,0.0,300000.0,25.3,0.0,148000000.0,24.9,0.0,975000.0,23.9,0.0,193000000.0,2020-11-14,2020,1,11.018646,19.062535,11.429555,19.701615,11.512935,13.458837,18.469471,10.819798,10.126671,11.156265,11.561725,8.853808,11.736077,18.951309,13.790194,13.592368,10.645449,11.703554,11.302217,11.289794,12.206078,0.0,13.764218,18.603002,12.611541,18.812723,13.790194,19.078201,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7009,1605312000,16,2,4,0,1,3,38,52,2.24,3.35,3.28,1.34,1.67,1.33,1.07,7.03,1.29,3.36,1.48,2.62,1.99,1.86,2.71,1.46,3.36,1.3,1.76,2.02,1.61,2.41,4.21,1.21,3.42,1.32,1.08,6.73,1.11,5.88,1.66,2.22,9.5,1.06,7.7,1.09,1.02,14.0,1.03,12.0,1.66,2.21,7.5,1.09,5.9,1.13,1.04,11.0,1.06,9.4,2.84,2.14,3.88,2.58,2.49,3.48,1.22,1.39,1.65,1.27,1.46,1.5,1.4,2.86,1.9,1.9,2.87,1.4,4.79,1.12,1.24,3.83,1.48,2.65,2.2,1.67,3.84,1.26,4.36,1.19,3.29,1.32,1.94,1.86,2.09,2.0,1.74,1.81,7.72,9.72,8.7,17.24,15.41,23.35,63.0,54.0,8.85,6.34,12.1,46.01,9.23,13.94,10.45,29.75,21.82,28.13,100.0,100.0,3.48,14.0,38.0,5.41,4.83,7.33,30.0,14.0,5.33,712,1,326,279,1543770000.0,371581200.0,12,1605200000.0,-165740400.0,14,11,41,24.9,16,268000.0,1100000000.0,43,24.8,17,172000.0,738000000.0,44,10361,25,35,0,22.4,26.08,23.92,25.75,400000.0,458000000.0,423000000.0,180000000.0,80000.0,381000.0,352000.0,150000.0,1,24.57,25.44,24.27,24.33,130000000.0,378000000.0,115000000.0,115000000.0,186000.0,236000.0,105000.0,128000.0,1,24.1,0.0,450000000.0,23.4,0.0,210000000.0,25.4,0.0,180000000.0,24.0,0.0,430000000.0,22.3,0.0,250000000.0,22.9,0.0,310000000.0,23.5,0.0,248000000.0,22.9,0.0,195000000.0,2020-11-14,2020,1,12.498746,20.818576,12.055256,20.419454,12.899222,19.94238,19.862883,19.008467,11.289794,12.850557,12.771389,11.918397,18.683045,19.750405,18.560443,18.560443,12.133507,12.371591,11.561725,11.759793,19.924758,19.162618,19.008467,19.879296,19.336971,19.552083,19.328939,19.08851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7010,1605312000,16,2,4,0,2,4,44,88,2.83,3.35,2.43,1.53,1.41,1.31,1.06,7.46,1.26,3.51,1.42,2.76,1.87,1.95,2.49,1.51,3.07,1.34,1.69,2.09,2.09,1.8,5.41,1.13,4.7,1.17,1.17,4.58,1.25,3.78,2.03,1.79,11.5,1.03,9.7,1.05,1.05,9.7,1.08,7.9,2.04,1.78,9.2,1.06,7.7,1.09,1.09,7.6,1.13,6.1,3.45,2.16,3.05,3.11,2.53,2.71,1.34,1.27,1.64,1.42,1.32,1.48,1.38,2.88,1.83,1.95,2.73,1.41,4.81,1.12,1.21,3.87,1.45,2.77,2.07,1.72,3.52,1.3,4.18,1.21,3.14,1.34,1.92,1.83,2.12,2.0,1.72,1.81,9.11,12.55,9.64,24.46,18.45,23.94,100.0,78.0,9.44,6.38,11.45,39.47,8.57,10.68,8.95,19.66,15.66,22.34,75.0,58.0,4.61,14.0,30.0,6.71,5.03,5.88,34.0,14.0,3.82,284,896,234,133,1580749000.0,-175503600.0,22,1527872000.0,381171600.0,19,11,37,24.8,10,67000.0,248000000.0,37,23.4,5,68000.0,253000000.0,87,25136,38,34,0,26.67,23.58,22.64,27.73,300000.0,800000.0,525000.0,850000.0,100000.0,67000.0,48000.0,77000.0,0,21.5,23.14,24.8,23.22,100000.0,800000.0,600000.0,103000000.0,25000.0,57000.0,60000.0,114000.0,1,24.2,0.0,400000.0,24.4,0.0,375000.0,24.8,0.0,170000000.0,22.0,0.0,625000.0,23.5,0.0,400000.0,20.8,0.0,400000.0,25.4,0.0,650000.0,21.6,0.0,500000.0,2020-11-14,2020,1,11.112463,19.328939,11.127278,19.3489,12.611541,13.592368,13.171155,13.652993,11.512935,11.112463,10.778977,11.251574,11.512935,13.592368,13.304687,18.45024,10.126671,10.950824,11.002117,11.643962,12.899222,12.834684,18.951309,13.345509,12.899222,12.899222,13.384729,13.122365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7011,1605484800,33,1,4,0,1,3,55,79,3.25,3.64,2.12,1.73,1.35,1.29,1.03,11.02,1.17,4.54,1.27,3.7,1.59,2.34,1.99,1.81,2.5,1.51,1.53,2.44,2.38,1.62,6.31,1.09,5.5,1.13,1.23,3.98,1.36,3.09,2.22,1.66,12.5,1.03,10.5,1.04,1.07,8.5,1.11,6.65,2.19,1.68,9.7,1.05,8.2,1.08,1.1,6.65,1.16,5.25,3.66,2.36,2.64,3.35,2.74,2.43,1.44,1.26,1.53,1.48,1.3,1.43,1.28,3.4,1.56,2.38,2.37,1.57,4.06,1.21,1.16,5.04,1.3,3.52,1.84,1.97,2.75,1.45,3.43,1.3,2.66,1.45,1.92,1.89,1.97,1.95,1.84,1.85,11.75,16.08,10.33,30.18,19.02,21.28,100.0,80.0,12.66,7.04,10.72,29.33,9.21,10.07,8.3,15.8,12.65,16.93,44.0,33.0,5.34,14.0,24.0,8.13,5.99,5.42,34.0,14.0,3.04,504,1263,330,226,1561914000.0,274899600.0,19,1496336000.0,-252140400.0,12,11,38,23.8,10,110000.0,418000000.0,42,23.5,14,193000.0,810000000.0,30,19500,15,44,0,25.0,23.15,25.0,23.0,300000.0,170000000.0,148000000.0,700000.0,75000.0,131000.0,148000.0,64000.0,1,23.4,23.64,22.43,24.78,100000.0,320000000.0,300000000.0,180000000.0,20000.0,229000.0,214000.0,200000.0,1,24.3,0.0,103000000.0,21.0,0.0,825000.0,22.7,0.0,775000.0,23.2,0.0,218000000.0,22.7,0.0,250000.0,24.4,0.0,320000000.0,24.5,0.0,478000000.0,22.6,275000.0,215000000.0,2020-11-16,2020,1,11.608245,19.850992,12.170451,20.512545,12.611541,18.951309,18.812723,13.458837,11.225257,11.78296,11.904974,11.066654,11.512935,19.583832,19.519293,19.008467,9.903538,12.341482,12.273736,12.206078,18.45024,13.62314,13.56062,19.200006,12.42922,19.583832,19.985121,19.186149,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.52453
7012,1605571200,30,5,4,0,2,4,97,31,1.76,3.51,4.63,1.18,2.01,1.28,1.09,6.8,1.31,3.13,1.52,2.4,2.02,1.77,2.82,1.4,3.52,1.26,1.95,1.79,1.32,3.36,3.15,1.36,2.4,1.56,1.02,10.5,1.04,9.42,1.4,2.95,7.1,1.1,5.45,1.15,0.0,18.0,1.01,17.0,1.39,3.0,5.45,1.15,4.04,1.24,1.01,15.0,1.02,14.0,2.39,2.13,5.17,2.13,2.5,4.58,1.13,1.53,1.66,1.16,1.64,1.49,1.41,2.7,1.95,1.81,2.96,1.35,6.21,1.1,1.25,3.59,1.51,2.48,2.23,1.62,3.94,1.25,4.89,1.16,3.67,1.26,1.92,1.83,2.15,2.03,1.7,1.79,6.47,7.59,8.31,11.92,13.17,24.53,35.0,39.0,8.66,6.64,14.09,57.08,10.46,20.12,13.65,54.03,35.34,40.94,100.0,100.0,2.63,15.0,56.0,4.56,4.95,10.0,28.0,16.0,8.13,652,102,280,111,1549213000.0,-162802800.0,19,1596733000.0,55702800.0,19,11,35,26.0,4,42000.0,148000000.0,41,23.4,6,57000.0,235000000.0,93,19052,69,78,0,26.0,25.18,27.22,25.9,400000.0,250000.0,275000.0,550000.0,100000.0,23000.0,28000.0,55000.0,1,24.5,23.23,22.56,23.67,350000.0,125000000.0,250000.0,500000.0,88000.0,96000.0,28000.0,33000.0,1,21.0,0.0,0.0,17.9,0.0,100000.0,25.0,0.0,50000.0,25.7,0.0,450000.0,23.2,0.0,100000.0,24.3,0.0,350000.0,26.6,0.0,235000000.0,21.7,0.0,800000.0,2020-11-17,2020,0,10.645449,18.812723,10.950824,19.275096,12.899222,12.42922,12.52453,13.217675,11.512935,10.043293,10.239996,10.915107,12.765691,18.643824,12.42922,13.122365,11.385103,11.472114,10.239996,10.404293,0.0,11.512935,10.819798,13.017005,11.512935,12.765691,19.275096,13.592368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
model_params = {'n_estimators':1000,
                'learning_rate':0.01,
                'loss_function':'MultiClass',
                'task_type':'GPU',
                'verbose':250
                }

model = CatBoostClassifier(**model_params)

In [23]:
# model_params = {'n_estimators':1000,
#                 'learning_rate':0.01,
#                 'loss_function':'MultiClass',
#                 'task_type':'GPU',
#                 'verbose':250
#                 }

# model = BoostingModel(params=model_params, data=model_data)

# model.fit()
# preds_class = model.predict(X_test)
# preds_proba = model.predict_proba(X_test)

0:	learn: 1.0970040	test: 1.0979961	best: 1.0979961 (0)	total: 17.3ms	remaining: 17.3s
250:	learn: 0.9990590	test: 1.0795494	best: 1.0722525 (86)	total: 3.83s	remaining: 11.4s
500:	learn: 0.9761152	test: 1.0892192	best: 1.0722525 (86)	total: 7.87s	remaining: 7.83s
750:	learn: 0.9545336	test: 1.0903403	best: 1.0722525 (86)	total: 11.6s	remaining: 3.85s
999:	learn: 0.9351588	test: 1.0947804	best: 1.0722525 (86)	total: 15.4s	remaining: 0us
bestTest = 1.072252502
bestIteration = 86
Shrink model to first 87 iterations.


# Оценка качества

In [24]:
print(f'Validation ACCURACY SCORE: {accuracy_score(y_test, preds_class)}')

Validation ACCURACY SCORE: 0.55


In [25]:
print(f'Validation ROC AUC SCORE: {roc_auc_score(y_test, preds_proba, multi_class="ovr")}')

Validation ROC AUC SCORE: 0.4869378306878307


In [26]:
train_preds_class = model.model.predict(train.drop(columns=['target']))

In [27]:
train_preds_df = pd.DataFrame()
train_preds_df['pred_class'] = train_preds_class.ravel()

In [28]:
print('True target rate: ')
print(f"Class_0 rate {train.target.value_counts()[0] / train.shape[0]}")
print(f"Class_1 rate {train.target.value_counts()[1] / train.shape[0]}")
print(f"Class_3 rate {train.target.value_counts()[3] / train.shape[0]}")

True target rate: 
Class_0 rate 0.30745957224830467
Class_1 rate 0.26113719353155973
Class_3 rate 0.43140323422013566


In [30]:
train_preds_df.pred_class.value_counts()

3    6675
0    2910
Name: pred_class, dtype: int64

In [31]:
print('Predict target rate:')
print(f"Class_0 rate {train_preds_df.pred_class.value_counts()[0] / train_preds_df.shape[0]}")
# print(f"Class_1 rate {train_preds_df.pred_class.value_counts()[1] / train_preds_df.shape[0]}")
print(f"Class_3 rate {train_preds_df.pred_class.value_counts()[3] / train_preds_df.shape[0]}")

Predict target rate:
Class_0 rate 0.30359937402190923
Class_3 rate 0.6964006259780907


In [32]:
results = pd.DataFrame()

results['true'] = y_test
results['pred_class'] = preds_class
results['score_0'] = preds_proba[:,0]
results['score_1'] = preds_proba[:,1]
results['score_3'] = preds_proba[:,2]

results

Unnamed: 0,true,pred_class,score_0,score_1,score_3
6891,3,0,0.398686,0.302882,0.298433
9333,3,3,0.228844,0.258639,0.512517
6766,0,0,0.432595,0.287196,0.280209
9510,3,3,0.298932,0.310747,0.39032
9408,0,0,0.399739,0.291138,0.309124
7018,0,3,0.21748,0.237801,0.544719
6964,3,3,0.303694,0.312797,0.38351
6837,0,3,0.343315,0.31249,0.344195
9604,1,3,0.276102,0.307684,0.416214
9669,1,0,0.488301,0.26334,0.248359


# Влияние фичей на качество модели

In [38]:
feature_importance = model.get_feature_importances()
feature_importance_df = pd.DataFrame()

feature_importance_df['feature'] = X_test.columns
feature_importance_df['importance'] = feature_importance

good_features = feature_importance_df[np.abs(feature_importance_df.importance) > 0].sort_values(by='importance',ascending=False).head(30)
bad_features = feature_importance_df.sort_values(by='importance',ascending=True).head(30)

print(f'Number of features {X_test.shape[1]}')

Number of features 240


In [39]:
good_features

Unnamed: 0,feature,importance
9,home_win_rate,7.668776
10,draw_rate,6.231026
11,away_win_rate,5.006873
64,away_second_half_win_rate,4.338611
14,no_draw_rate,4.21873
36,away_handicap_plus_15,3.831842
62,home_second_half_win_rate,3.671404
39,home_first_half_handicap_0,3.444516
29,home_handicap_0,2.902997
13,away_double_chance_rate,2.707952


In [40]:
bad_features

Unnamed: 0,feature,importance
119,home_away,0.0
169,away_goalkeepers_total_market_value,0.0
166,away_defenders_average_age,0.0
164,home_is_manager_and_league_same_country,0.0
162,home_midfields_e_market_value,0.0
158,home_midfields_total_market_value,0.0
151,is_derby,0.0
150,away_city,0.0
149,home_city,0.0
147,home_stadium,0.0


In [42]:
test_predictions = pd.DataFrame()
test_predictions['home_team'] = X_test.home_team.map(decode_labels['team_names'])
test_predictions['away_team'] = X_test.away_team.map(decode_labels['team_names'])
test_predictions['home_win_proba'] = preds_proba[:,0]
test_predictions['draw_proba'] = preds_proba[:,1]
test_predictions['away_win_proba'] = preds_proba[:,2]
test_predictions['home_win_rate'] = X_test.home_win_rate
test_predictions['draw_rate'] = X_test.draw_rate
test_predictions['away_win_rate'] = X_test.away_win_rate
test_predictions['result'] = y_test
test_predictions['predict'] = preds_class

In [43]:
test_predictions

Unnamed: 0,home_team,away_team,home_win_proba,draw_proba,away_win_proba,home_win_rate,draw_rate,away_win_rate,result,predict
6891,Crewe Alexandra,AFC Wimbledon,0.398686,0.302882,0.298433,3.51,3.28,2.18,3,0
9333,Exeter City,Colchester United,0.228844,0.258639,0.512517,1.5,4.12,6.49,3,3
6766,Fleetwood Town,Oxford United,0.432595,0.287196,0.280209,4.38,3.77,1.79,0,0
9510,Bristol Rovers,Salford City,0.298932,0.310747,0.39032,2.33,3.23,3.15,3,3
9408,Oldham Athletic,Northampton Town,0.399739,0.291138,0.309124,3.46,3.17,2.21,0,0
7018,Wigan Athletic,Cambridge United,0.21748,0.237801,0.544719,1.33,5.05,9.31,0,3
6964,Rotherham United,Ipswich Town,0.303694,0.312797,0.38351,2.22,3.25,3.43,3,3
6837,Milton Keynes Dons,Sheffield Wednesday,0.343315,0.31249,0.344195,2.63,3.2,2.8,0,3
9604,Salford City,Barrow AFC,0.276102,0.307684,0.416214,1.92,3.3,4.29,1,3
9669,Scunthorpe United,Stevenage FC,0.488301,0.26334,0.248359,7.6,4.32,1.44,1,0


# Считаем ROI - "коэффициент выгодности ставки"

In [44]:
class ROIChecker():
    def __init__(self, predictions):
        self.predictions = predictions
    
    def get_roi(self):
        
        self.predictions['home_win_ROI'] = self.predictions.home_win_rate * self.predictions.home_win_proba - 1
        self.predictions['away_win_ROI'] = self.predictions.away_win_rate * self.predictions.away_win_proba - 1
        self.predictions['draw_ROI'] = self.predictions.draw_rate * self.predictions.draw_proba - 1
        
        return self.predictions

In [45]:
checker = ROIChecker(test_predictions)
roi_info = checker.get_roi()

In [46]:
roi_info.head()

Unnamed: 0,home_team,away_team,home_win_proba,draw_proba,away_win_proba,home_win_rate,draw_rate,away_win_rate,result,predict,home_win_ROI,away_win_ROI,draw_ROI
6891,Crewe Alexandra,AFC Wimbledon,0.398686,0.302882,0.298433,3.51,3.28,2.18,3,0,0.399387,-0.349417,-0.006548
9333,Exeter City,Colchester United,0.228844,0.258639,0.512517,1.5,4.12,6.49,3,3,-0.656734,2.326237,0.065591
6766,Fleetwood Town,Oxford United,0.432595,0.287196,0.280209,4.38,3.77,1.79,0,0,0.894766,-0.498426,0.082729
9510,Bristol Rovers,Salford City,0.298932,0.310747,0.39032,2.33,3.23,3.15,3,3,-0.303487,0.229508,0.003714
9408,Oldham Athletic,Northampton Town,0.399739,0.291138,0.309124,3.46,3.17,2.21,0,0,0.383096,-0.316837,-0.077094


In [103]:
def explain_roi_info(roi_info):
    roi_cols = ['home_win_ROI', 'away_win_ROI', 'draw_ROI']
    best_roi_df = pd.DataFrame()
    for index, row in roi_info.iterrows():
        print(f"Match #{index+1}: {row.home_team} vs {row.away_team}")
        
        max_roi = np.max(roi_info.loc[index, roi_cols])
        current_choice = 'home_win_ROI'
        
        for col in roi_cols:
            if row[col] == max_roi:
                current_choice = col
        current_choice = ' '.join(current_choice.split('_')[:1])
        
        if max_roi > 0:
            print(f"Maximal ROI = {np.round(max_roi*100,2)}% on {current_choice}")
        
        else:
            print("Нет положительного ROI")
            
        print('_______________________________ \n')
        
        best_roi_df.loc[index, 'home_team'] = row.home_team
        best_roi_df.loc[index, 'away_team'] = row.away_team
        best_roi_df.loc[index, 'best_ROI'] = max_roi
        best_roi_df.loc[index, 'choice'] = current_choice
        best_roi_df.loc[index, 'home_win_rate'] = row.home_win_rate
        best_roi_df.loc[index, 'draw_rate'] = row.draw_rate
        best_roi_df.loc[index, 'away_win_rate'] = row.away_win_rate
        best_roi_df.loc[index, 'result'] = row.result
        best_roi_df.loc[index, 'predict'] = row.predict

        
    return best_roi_df

In [104]:
best_roi_df = explain_roi_info(roi_info)

Match #6892: Crewe Alexandra vs AFC Wimbledon
Maximal ROI = 39.94% on home
_______________________________ 

Match #9334: Exeter City vs Colchester United
Maximal ROI = 232.62% on away
_______________________________ 

Match #6767: Fleetwood Town vs Oxford United
Maximal ROI = 89.48% on home
_______________________________ 

Match #9511: Bristol Rovers vs Salford City
Maximal ROI = 22.95% on away
_______________________________ 

Match #9409: Oldham Athletic vs Northampton Town
Maximal ROI = 38.31% on home
_______________________________ 

Match #7019: Wigan Athletic vs Cambridge United
Maximal ROI = 407.13% on away
_______________________________ 

Match #6965: Rotherham United vs Ipswich Town
Maximal ROI = 31.54% on away
_______________________________ 

Match #6838: Milton Keynes Dons vs Sheffield Wednesday
Нет положительного ROI
_______________________________ 

Match #9605: Salford City vs Barrow AFC
Maximal ROI = 78.56% on away
_______________________________ 

Match #9670: Scunt

In [105]:
best_roi_df

Unnamed: 0,home_team,away_team,best_ROI,choice,home_win_rate,draw_rate,away_win_rate,result,predict
6891,Crewe Alexandra,AFC Wimbledon,0.399387,home,3.51,3.28,2.18,3.0,0.0
9333,Exeter City,Colchester United,2.326237,away,1.5,4.12,6.49,3.0,3.0
6766,Fleetwood Town,Oxford United,0.894766,home,4.38,3.77,1.79,0.0,0.0
9510,Bristol Rovers,Salford City,0.229508,away,2.33,3.23,3.15,3.0,3.0
9408,Oldham Athletic,Northampton Town,0.383096,home,3.46,3.17,2.21,0.0,0.0
7018,Wigan Athletic,Cambridge United,4.071338,away,1.33,5.05,9.31,0.0,3.0
6964,Rotherham United,Ipswich Town,0.315438,away,2.22,3.25,3.43,3.0,3.0
6837,Milton Keynes Dons,Sheffield Wednesday,-3.2e-05,draw,2.63,3.2,2.8,0.0,3.0
9604,Salford City,Barrow AFC,0.785557,away,1.92,3.3,4.29,1.0,3.0
9669,Scunthorpe United,Stevenage FC,2.711086,home,7.6,4.32,1.44,1.0,0.0


In [193]:
def money_score(best_roi_df, bet=100):
    results = {'home':3, 'draw':1, 'away':0}
    cols = {'home':'home_win_rate', 'draw':'draw_rate', 'away':'away_win_rate'}
    profit = 0
    skipped_bets = 0
    accepted_bets = 0
    total_coef = 0
    win_coef = 0
    lose_coef = 0
    win_bets = 0
    lose_bets = 0
    total_bank = 0
    win_bank = 0
    lose_bank = 0
    
    for index, row in best_roi_df.iterrows():
        if row.best_ROI < 1:
                skipped_bets += 1
        else:
            accepted_bets += 1

            if results[row.choice] == row.result:

                current_profit = bet * (row[cols[row.choice]] - 1)
                profit += current_profit
                win_coef += row[cols[row.choice]]

                print(f'Match {row.home_team} vs {row.away_team}')
                print(row.choice, row[cols[row.choice]], current_profit)
                print(f'Match score ')
                print('_____________________________________________________\n')
                win_bank += current_profit
                win_bets += 1
                
            else:
                profit -= bet
                lose_bank -= bet
                lose_bets += 1
                lose_coef += row[cols[row.choice]]
                
            total_coef += row[cols[row.choice]]
            total_bank += bet

    average_win_coef = win_coef / win_bets

    average_lose_coef = lose_coef / lose_bets
    average_coef = total_coef / accepted_bets
    percent_profit = profit / total_bank * 100
          
    result = ((skipped_bets, accepted_bets),
            (profit, percent_profit),
            (total_bank, win_bank, lose_bank),
            (win_bets, lose_bets),
            (average_coef, average_win_coef, average_lose_coef))
    
    return result
    

In [194]:
score = money_score(best_roi_df, 100)

Match Wigan Athletic vs Cambridge United
away 9.31 831.0
Match score 
_____________________________________________________

Match Carlisle United vs Mansfield Town
home 4.7 370.0
Match score 
_____________________________________________________



In [195]:
print(f'Skipped bets: {score[0][0]}\t Accepted bets: {score[0][1]}')
print(f"Win bets: {score[3][0]}\t Lose bets: {score[3][1]}")
print(f"Win bank: {score[2][1]}$\t Lose bank: {score[2][2]}$")

print(f"Average coefficient: {np.round(score[4][0],2)}")
print(f"Avg win coef: {np.round(score[4][1],2)}\t Avg lose coef: {np.round(score[4][2],2)}")
print(f"Initial bank was {score[2][0]}$ \t Finish bank is {score[2][0]+np.round(score[1][0],2)}$\n")
print('___________RESULT__________')
print(f" \tWe get + {np.round(score[1][1],2)}% profit")
print(f" \tProfit: {score[1][0]}$")

Skipped bets: 14	 Accepted bets: 6
Win bets: 2	 Lose bets: 4
Win bank: 1201.0$	 Lose bank: -400$
Average coefficient: 6.96
Avg win coef: 7.01	 Avg lose coef: 6.94
Initial bank was 600$ 	 Finish bank is 1401.0$

___________RESULT__________
 	We get + 133.5% profit
 	Profit: 801.0$


# [НАВЕРХ](#top)