In [562]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [563]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random

from market_value_predictor.preproc import reduce_number_of_classes

In [564]:
random.seed(42)

# Import data

In [565]:
df_train = pd.read_csv("../../raw_data/master_data_train.csv").drop(columns="Unnamed: 0")
df_test = pd.read_csv("../../raw_data/master_data_test.csv").drop(columns="Unnamed: 0")

In [566]:
df_train.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3132 entries, 0 to 3131
Data columns (total 101 columns):
 #    Column                         Non-Null Count  Dtype  
---   ------                         --------------  -----  
 0    fee_cleaned                    3132 non-null   float64
 1    age                            3132 non-null   int64  
 2    height_cm                      3132 non-null   int64  
 3    weight_kg                      3132 non-null   int64  
 4    nationality                    3132 non-null   object 
 5    club_name                      3113 non-null   object 
 6    league_name                    3113 non-null   object 
 7    league_rank                    3113 non-null   float64
 8    overall                        3132 non-null   int64  
 9    potential                      3132 non-null   int64  
 10   wage_eur                       3132 non-null   int64  
 11   player_positions               3132 non-null   object 
 12   preferred_foot                 3

In [567]:
numericals = ["int64", "float64"]

In [568]:
X = df_train.select_dtypes(numericals).drop(columns="fee_cleaned")

In [569]:
y = df_train.fee_cleaned

# Feature selection

In [570]:
correlations = df_train.select_dtypes(numericals).corr().fee_cleaned

In [571]:
correlations.sort_values(ascending=False)[1:11]

release_clause_eur          0.540972
potential                   0.459752
wage_eur                    0.445108
gk_handling                 0.386970
overall                     0.366983
international_reputation    0.331902
gk_positioning              0.310531
movement_reactions          0.308766
gk_kicking                  0.300016
gk_reflexes                 0.294949
Name: fee_cleaned, dtype: float64

In [572]:
top_5_corr_num_features = list(correlations.sort_values(ascending=False)[1:6].index)

In [573]:
X_selection = X[top_5_corr_num_features]

In [574]:
X_selection

Unnamed: 0,release_clause_eur,potential,wage_eur,gk_handling,overall
0,1500000.0,77,2000,,63
1,3600000.0,83,6000,,67
2,625000.0,78,650,,58
3,27600000.0,81,60000,,80
4,14800000.0,79,30000,,74
...,...,...,...,...,...
3127,,80,30000,,76
3128,,72,40000,,70
3129,24200000.0,84,9000,,75
3130,4900000.0,86,15000,,68


# Train model 1: top 5 correlated numerical features

In [632]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn import set_config; set_config(display='diagram')
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [576]:
pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="constant", fill_value=0)),
    ("scaler", MinMaxScaler()),
    ("estimator", LinearRegression())
])
pipe

In [577]:
from sklearn.model_selection import cross_val_score

cross_val_score(pipe, X_selection, y, cv=5, scoring='r2').mean()

0.28870471180551427

# Feature engineering

In [578]:
from market_value_predictor.preproc import manual_encoding

In [579]:
df_train = manual_encoding(df_train, "player_tags")

In [580]:
df_train = manual_encoding(df_train, "player_positions")

In [581]:
df_train = manual_encoding(df_train, "player_traits")

In [582]:
df_train.select_dtypes(object)

Unnamed: 0,nationality,club_name,league_name,preferred_foot,work_rate,body_type,real_face,team_position
0,Italy,FC Sion,Swiss Super League,Left,High/Medium,Normal,No,LCM
1,France,Paris Saint-Germain,French Ligue 1,Right,Medium/Medium,Normal,No,RES
2,Ecuador,Club Atlético Talleres,Argentina Primera División,Left,Medium/Medium,Lean,No,SUB
3,Russia,FC Krasnodar,Russian Premier League,Right,High/Medium,Lean,No,ST
4,Serbia,Olympique de Marseille,French Ligue 1,Right,High/Low,Lean,Yes,SUB
...,...,...,...,...,...,...,...,...
3127,France,OGC Nice,French Ligue 1,Right,Medium/Low,Normal,No,SUB
3128,Norway,Rosenborg BK,Norwegian Eliteserien,Right,Medium/High,Normal,No,ST
3129,Brazil,Portimonense SC,Portuguese Liga ZON SAGRES,Left,Medium/Low,Normal,No,RW
3130,France,Olympique Lyonnais,French Ligue 1,Right,Medium/Medium,Normal,No,SUB


## Reduce number of classes on 'nationality'

In [583]:
nationality_df = pd.DataFrame(
    df_train.nationality.value_counts()).reset_index().rename(
        columns={
            "index": "nationality",
            "nationality": "count"
        })

In [584]:
temp_list = []
for i, elem in enumerate(list(nationality_df["count"])):
    if elem > 50:
        temp_list.append(list(nationality_df.nationality)[i])
    else:
        temp_list.append("other")

In [585]:
nationality_df["nationality_cleaned"] = temp_list

In [586]:
nationality_df = nationality_df.drop(columns="count")

In [587]:
df_train = df_train.merge(nationality_df, on="nationality", how="left").drop(columns="nationality")

## Reduce number of classes on 'league_name'

In [588]:
league_name_df = pd.DataFrame(
    df_train.league_name.value_counts()).reset_index().rename(
        columns={
            "index": "league_name",
            "league_name": "count"
        })

In [589]:
temp_list = []
for i, elem in enumerate(list(league_name_df["count"])):
    if elem > 100:
        temp_list.append(list(league_name_df.league_name)[i])
    else:
        temp_list.append("other")

In [590]:
league_name_df["league_name_cleaned"] = temp_list

In [591]:
league_name_df = league_name_df.drop(columns="count")

In [592]:
df_train = df_train.merge(league_name_df, on="league_name", how="left").drop(columns="league_name")

## Club_name

In [593]:
ohe = OneHotEncoder(sparse=False)

In [594]:
club_name_encoded = pd.DataFrame(ohe.fit_transform(df_train[["club_name"]]))

In [595]:
club_name_encoded.columns = ohe.get_feature_names_out()

In [596]:
club_name_encoded["fee_cleaned"] = df_train.fee_cleaned

In [597]:
club_name_encoded.corr().fee_cleaned.sort_values(ascending=False)[:20]

fee_cleaned                          1.000000
club_name_Athletic Club de Bilbao    0.131095
club_name_Borussia Dortmund          0.106269
club_name_Real Madrid                0.103005
club_name_FC Barcelona               0.091204
club_name_Juventus                   0.087745
club_name_Paris Saint-Germain        0.079938
club_name_Liverpool                  0.077838
club_name_SL Benfica                 0.076187
club_name_Chelsea                    0.074385
club_name_Manchester United          0.072174
club_name_Ajax                       0.068007
club_name_Leicester City             0.062929
club_name_Atlético Madrid            0.062028
club_name_FC Porto                   0.061783
club_name_Valencia CF                0.059865
club_name_Southampton                0.059597
club_name_Roma                       0.059025
club_name_Inter                      0.052526
club_name_Everton                    0.051069
Name: fee_cleaned, dtype: float64

In [598]:
### Drop club_name

In [599]:
df_train.drop(columns="club_name", inplace=True)

## Reduce number of classes on 'team_position'

In [600]:
attack = ["ST", "LS", "LW", "RS", "RW", "RF", "LF", "CF"]
mid = ["LCM", "RM", "CB", "CAM", "LM", "CM", "CDM", "RCM", "LCM", "RDM", "LDM", "RAM", "LAM"]
defense = ["RCB", "LCB", "CB", "RB", "LB", "RWB", "LWB"]
goal = ["GK"]
sub = ["SUB", "RES"]

In [601]:
df_train["position_cluster"] = df_train.team_position.map(lambda x: "attack" if x in attack else "mid"
                           if x in mid else "defense" if x in defense else "goal"
                           if x in goal else "sub" if x in sub else "nan")

In [602]:
np.unique(temp, return_counts=True)

(array(['attack', 'defense', 'goal', 'mid', 'nan', 'sub'], dtype=object),
 array([ 393,  504,  124,  666,   19, 1426]))

In [603]:
np.unique(list(df_train.team_position), return_counts=True)

(array(['CAM', 'CB', 'CDM', 'CF', 'CM', 'GK', 'LAM', 'LB', 'LCB', 'LCM',
        'LDM', 'LF', 'LM', 'LS', 'LW', 'LWB', 'RAM', 'RB', 'RCB', 'RCM',
        'RDM', 'RES', 'RF', 'RM', 'RS', 'RW', 'RWB', 'ST', 'SUB', 'nan'],
       dtype='<U3'),
 array([  92,   12,   35,    4,   21,  124,    6,  110,  141,   98,   45,
           6,  102,   62,   52,    6,    8,  105,  132,   95,   59,  230,
           6,   93,   65,   49,   10,  149, 1196,   19]))

In [604]:
df_train.drop(columns="team_position", inplace=True)

# Drop nas from object columns

In [605]:
df_train = df_train.dropna(subset=list(df_train.select_dtypes(object).columns))

In [606]:
df_train.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3113 entries, 0 to 3131
Data columns (total 165 columns):
 #    Column                                  Non-Null Count  Dtype  
---   ------                                  --------------  -----  
 0    fee_cleaned                             3113 non-null   float64
 1    age                                     3113 non-null   int64  
 2    height_cm                               3113 non-null   int64  
 3    weight_kg                               3113 non-null   int64  
 4    league_rank                             3113 non-null   float64
 5    overall                                 3113 non-null   int64  
 6    potential                               3113 non-null   int64  
 7    wage_eur                                3113 non-null   int64  
 8    preferred_foot                          3113 non-null   object 
 9    international_reputation                3113 non-null   int64  
 10   weak_foot                               3113 n

In [607]:
list(df_train.columns)

['fee_cleaned',
 'age',
 'height_cm',
 'weight_kg',
 'league_rank',
 'overall',
 'potential',
 'wage_eur',
 'preferred_foot',
 'international_reputation',
 'weak_foot',
 'skill_moves',
 'work_rate',
 'body_type',
 'real_face',
 'release_clause_eur',
 'team_jersey_number',
 'pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'gk_diving',
 'gk_handling',
 'gk_kicking',
 'gk_reflexes',
 'gk_speed',
 'gk_positioning',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'skill_dribbling',
 'skill_curve',
 'skill_fk_accuracy',
 'skill_long_passing',
 'skill_ball_control',
 'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots',
 'mentality_aggression',
 'mentality_interceptions',
 'mentality_positioning',
 'mentality_vision',
 'mentality_pen

# Train model 2: After feature engineering

In [608]:
all_cats = list(df_train.select_dtypes(object).columns)

In [609]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

all_numerics = list(df_train.select_dtypes(include=numerics).columns)

all_numerics.remove("fee_cleaned")

In [610]:
encoded_columns = [elem for elem in all_numerics if "player_tags_" in elem] + [
    elem for elem in all_numerics if "player_positions_" in elem
] + [elem for elem in all_numerics if "player_traits_" in elem]

In [611]:
all_numerics_wo_encoded = []
for elem in all_numerics:
    if elem not in encoded_columns:
        all_numerics_wo_encoded.append(elem)

In [612]:
numericals_zero_impute = [
    "gk_diving", "gk_handling", "gk_kicking", "gk_reflexes", "gk_speed",
    "gk_positioning", "release_clause_eur"
]

In [613]:
numericals_mean_impute = []

for elem in all_numerics_wo_encoded:
    if elem not in numericals_zero_impute:
        numericals_mean_impute.append(elem)

In [614]:
print(X_2.shape[1])
print(len(all_numerics))
print(len(all_cats))
print(len(encoded_columns))
print(len(all_numerics_wo_encoded))
print(len(numericals_mean_impute))
print(len(numericals_zero_impute))

223
157
7
68
89
82
7


In [615]:
num_zero_tr = Pipeline([
    ("imputer", SimpleImputer(strategy="constant", fill_value=0)),
    ("scaler", MinMaxScaler())
])

num_mean_tr = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", MinMaxScaler())
])

cat_tr = OneHotEncoder(handle_unknown='ignore', )

In [639]:
preprocessor = ColumnTransformer(
    [("numerics_zero_imputing", num_zero_tr, numericals_zero_impute),
     ("numerics_mean_imputing", num_mean_tr, numericals_mean_impute),
     ("cat_tr", cat_tr, all_cats)],
remainder="passthrough")

pipe_2 = Pipeline([
    ("preprocessing", preprocessor), 
    ("regressor", Lasso())])

pipe_2

In [635]:
X_2 = df_train.drop(columns="fee_cleaned")
y_2 = df_train[["fee_cleaned"]]

In [636]:
cross_val_score(pipe_2, X_2, y_2, cv=5, scoring="r2").mean()

0.006457387480565413

In [640]:
pipe_2.get_params()

{'memory': None,
 'steps': [('preprocessing',
   ColumnTransformer(remainder='passthrough',
                     transformers=[('numerics_zero_imputing',
                                    Pipeline(steps=[('imputer',
                                                     SimpleImputer(fill_value=0,
                                                                   strategy='constant')),
                                                    ('scaler', MinMaxScaler())]),
                                    ['gk_diving', 'gk_handling', 'gk_kicking',
                                     'gk_reflexes', 'gk_speed', 'gk_positioning',
                                     'release_clause_eur']),
                                   ('numerics_mean_imputing',
                                    Pipeline(steps=[('imputer', Simple...
                                     'attacking_volleys', 'skill_dribbling',
                                     'skill_curve', 'skill_fk_accuracy',
                     

## Grid search

In [644]:
from xgboost.sklearn import XGBRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from catboost import CatBoostRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR

In [647]:
from sklearn.model_selection import GridSearchCV

In [645]:
param_grid={
#     'columntransformer__num_tr__scaler': [RobustScaler()],
    'regressor': [
        Ridge(), 
        #LinearRegression(), 
        XGBRegressor(), 
        CatBoostRegressor(), 
        SGDRegressor(), 
        #KernelRidge(), 
        #ElasticNet(),
        #BayesianRidge(), 
        #GradientBoostingRegressor(),
        #SVR()
    ]}

In [648]:
grid_search = GridSearchCV(
    pipe_2, 
    param_grid=param_grid,
    cv=5,
    scoring="r2",
    n_jobs=-1)

grid_search.fit(X_2, y_2)
print(grid_search.best_params_)
print(grid_search.best_score_)

Learning rate set to 0.047291
0:	learn: 11.1012040	total: 329ms	remaining: 5m 28s
1:	learn: 10.9410770	total: 423ms	remaining: 3m 30s
2:	learn: 10.7947478	total: 606ms	remaining: 3m 21s
3:	learn: 10.6542670	total: 686ms	remaining: 2m 50s
4:	learn: 10.5179182	total: 900ms	remaining: 2m 59s
5:	learn: 10.3766421	total: 1.08s	remaining: 2m 59s
6:	learn: 10.2529840	total: 1.14s	remaining: 2m 42s
7:	learn: 10.1228661	total: 1.29s	remaining: 2m 39s
8:	learn: 10.0150990	total: 1.36s	remaining: 2m 30s
9:	learn: 9.9077950	total: 1.44s	remaining: 2m 22s
10:	learn: 9.7953198	total: 1.52s	remaining: 2m 17s
11:	learn: 9.7098350	total: 1.59s	remaining: 2m 10s
12:	learn: 9.6241493	total: 1.7s	remaining: 2m 8s
13:	learn: 9.5376101	total: 1.77s	remaining: 2m 5s
14:	learn: 9.4544618	total: 1.85s	remaining: 2m 1s
15:	learn: 9.4274859	total: 1.97s	remaining: 2m 1s
16:	learn: 9.3960651	total: 2.07s	remaining: 1m 59s
17:	learn: 9.3367823	total: 2.12s	remaining: 1m 55s
18:	learn: 9.2587035	total: 2.24s	remain

Learning rate set to 0.047291
0:	learn: 10.9483260	total: 249ms	remaining: 4m 8s
1:	learn: 10.8163032	total: 349ms	remaining: 2m 54s
2:	learn: 10.6739505	total: 481ms	remaining: 2m 39s
3:	learn: 10.5352768	total: 600ms	remaining: 2m 29s
4:	learn: 10.4015343	total: 700ms	remaining: 2m 19s
5:	learn: 10.2803938	total: 779ms	remaining: 2m 9s
6:	learn: 10.1555518	total: 883ms	remaining: 2m 5s
7:	learn: 10.0530319	total: 1.15s	remaining: 2m 22s
8:	learn: 9.9374007	total: 1.35s	remaining: 2m 28s
9:	learn: 9.8384752	total: 1.41s	remaining: 2m 19s
10:	learn: 9.7352872	total: 1.47s	remaining: 2m 12s
11:	learn: 9.6436710	total: 1.73s	remaining: 2m 22s
12:	learn: 9.5513131	total: 1.85s	remaining: 2m 20s
13:	learn: 9.4612458	total: 1.91s	remaining: 2m 14s
14:	learn: 9.3688949	total: 1.96s	remaining: 2m 8s
15:	learn: 9.2833407	total: 2.08s	remaining: 2m 7s
16:	learn: 9.2027505	total: 2.21s	remaining: 2m 7s
17:	learn: 9.1383080	total: 2.31s	remaining: 2m 5s
18:	learn: 9.0790668	total: 2.4s	remaining:

157:	learn: 6.5271467	total: 18.4s	remaining: 1m 38s
158:	learn: 6.5195498	total: 18.6s	remaining: 1m 38s
159:	learn: 6.5041395	total: 18.6s	remaining: 1m 37s
160:	learn: 6.4962734	total: 18.7s	remaining: 1m 37s
161:	learn: 6.4889154	total: 18.8s	remaining: 1m 37s
162:	learn: 6.4852552	total: 18.9s	remaining: 1m 37s
163:	learn: 6.4807291	total: 18.9s	remaining: 1m 36s
164:	learn: 6.4766382	total: 19s	remaining: 1m 36s
165:	learn: 6.4657047	total: 19.1s	remaining: 1m 35s
166:	learn: 6.4569421	total: 19.2s	remaining: 1m 35s
167:	learn: 6.4446299	total: 19.2s	remaining: 1m 35s
168:	learn: 6.4418525	total: 19.4s	remaining: 1m 35s
169:	learn: 6.4346420	total: 19.5s	remaining: 1m 35s
170:	learn: 6.4314310	total: 19.6s	remaining: 1m 34s
171:	learn: 6.4156711	total: 19.6s	remaining: 1m 34s
172:	learn: 6.4080908	total: 19.7s	remaining: 1m 34s
173:	learn: 6.4063494	total: 19.8s	remaining: 1m 34s
174:	learn: 6.4033159	total: 19.9s	remaining: 1m 33s
175:	learn: 6.3973706	total: 20s	remaining: 1m 3

312:	learn: 5.0770102	total: 30.9s	remaining: 1m 7s
313:	learn: 5.0672326	total: 31.2s	remaining: 1m 8s
314:	learn: 5.0624307	total: 31.3s	remaining: 1m 8s
315:	learn: 5.0599435	total: 31.4s	remaining: 1m 7s
316:	learn: 5.0531613	total: 31.5s	remaining: 1m 7s
317:	learn: 5.0473840	total: 31.6s	remaining: 1m 7s
318:	learn: 5.0406240	total: 31.7s	remaining: 1m 7s
319:	learn: 5.0363398	total: 31.7s	remaining: 1m 7s
320:	learn: 5.0222877	total: 31.8s	remaining: 1m 7s
321:	learn: 5.0136471	total: 31.9s	remaining: 1m 7s
322:	learn: 5.0076048	total: 32s	remaining: 1m 7s
323:	learn: 5.0036713	total: 32.1s	remaining: 1m 6s
324:	learn: 4.9896573	total: 32.1s	remaining: 1m 6s
325:	learn: 4.9782725	total: 32.3s	remaining: 1m 6s
326:	learn: 4.9686492	total: 32.4s	remaining: 1m 6s
327:	learn: 4.9625808	total: 32.4s	remaining: 1m 6s
328:	learn: 4.9519820	total: 32.5s	remaining: 1m 6s
329:	learn: 4.9460191	total: 32.5s	remaining: 1m 6s
330:	learn: 4.9409080	total: 32.6s	remaining: 1m 5s
331:	learn: 4.

312:	learn: 5.4352877	total: 32.9s	remaining: 1m 12s
313:	learn: 5.4238317	total: 32.9s	remaining: 1m 11s
314:	learn: 5.4204039	total: 33s	remaining: 1m 11s
315:	learn: 5.4186918	total: 33.1s	remaining: 1m 11s
316:	learn: 5.4040255	total: 33.2s	remaining: 1m 11s
317:	learn: 5.3951101	total: 33.3s	remaining: 1m 11s
318:	learn: 5.3850471	total: 33.4s	remaining: 1m 11s
319:	learn: 5.3745685	total: 33.5s	remaining: 1m 11s
320:	learn: 5.3728999	total: 33.5s	remaining: 1m 10s
321:	learn: 5.3719046	total: 33.6s	remaining: 1m 10s
322:	learn: 5.3600169	total: 33.7s	remaining: 1m 10s
323:	learn: 5.3460056	total: 33.7s	remaining: 1m 10s
324:	learn: 5.3376115	total: 33.8s	remaining: 1m 10s
325:	learn: 5.3286898	total: 33.8s	remaining: 1m 9s
326:	learn: 5.3222406	total: 33.9s	remaining: 1m 9s
327:	learn: 5.3093842	total: 34s	remaining: 1m 9s
328:	learn: 5.3031665	total: 34s	remaining: 1m 9s
329:	learn: 5.2925363	total: 34.1s	remaining: 1m 9s
330:	learn: 5.2916367	total: 34.1s	remaining: 1m 8s
331:	

470:	learn: 4.3379483	total: 46.6s	remaining: 52.4s
471:	learn: 4.3322244	total: 46.7s	remaining: 52.3s
472:	learn: 4.3273517	total: 46.8s	remaining: 52.1s
473:	learn: 4.3231710	total: 46.9s	remaining: 52s
474:	learn: 4.3192292	total: 47s	remaining: 51.9s
475:	learn: 4.3115129	total: 47s	remaining: 51.8s
476:	learn: 4.3064748	total: 47s	remaining: 51.6s
477:	learn: 4.2992568	total: 47.1s	remaining: 51.5s
478:	learn: 4.2974877	total: 47.2s	remaining: 51.3s
479:	learn: 4.2944072	total: 47.2s	remaining: 51.2s
480:	learn: 4.2865484	total: 47.3s	remaining: 51.1s
481:	learn: 4.2787508	total: 47.4s	remaining: 51s
482:	learn: 4.2752953	total: 47.5s	remaining: 50.8s
483:	learn: 4.2731103	total: 47.5s	remaining: 50.6s
484:	learn: 4.2675708	total: 47.7s	remaining: 50.6s
485:	learn: 4.2616480	total: 47.7s	remaining: 50.5s
486:	learn: 4.2591308	total: 47.8s	remaining: 50.3s
487:	learn: 4.2518665	total: 47.9s	remaining: 50.3s
488:	learn: 4.2464153	total: 47.9s	remaining: 50.1s
489:	learn: 4.2441050	

628:	learn: 3.5300704	total: 59.7s	remaining: 35.2s
629:	learn: 3.5299480	total: 1m	remaining: 35.3s
630:	learn: 3.5239450	total: 1m	remaining: 35.2s
631:	learn: 3.5178742	total: 1m	remaining: 35.1s
632:	learn: 3.5133534	total: 1m	remaining: 35s
633:	learn: 3.5111403	total: 1m	remaining: 34.9s
634:	learn: 3.5077770	total: 1m	remaining: 34.8s
635:	learn: 3.4989294	total: 1m	remaining: 34.7s
636:	learn: 3.4975401	total: 1m	remaining: 34.5s
637:	learn: 3.4923237	total: 1m	remaining: 34.4s
638:	learn: 3.4882964	total: 1m	remaining: 34.3s
639:	learn: 3.4848258	total: 1m	remaining: 34.2s
640:	learn: 3.4824596	total: 1m	remaining: 34.1s
641:	learn: 3.4766463	total: 1m	remaining: 33.9s
642:	learn: 3.4712320	total: 1m 1s	remaining: 33.9s
643:	learn: 3.4711034	total: 1m 1s	remaining: 33.8s
644:	learn: 3.4668567	total: 1m 1s	remaining: 33.7s
645:	learn: 3.4667412	total: 1m 1s	remaining: 33.6s
646:	learn: 3.4649963	total: 1m 1s	remaining: 33.5s
647:	learn: 3.4637155	total: 1m 1s	remaining: 33.4s
6

629:	learn: 3.7027451	total: 1m 2s	remaining: 36.9s
630:	learn: 3.6963123	total: 1m 2s	remaining: 36.8s
631:	learn: 3.6912467	total: 1m 3s	remaining: 36.8s
632:	learn: 3.6910107	total: 1m 3s	remaining: 36.6s
633:	learn: 3.6903220	total: 1m 3s	remaining: 36.6s
634:	learn: 3.6858045	total: 1m 3s	remaining: 36.5s
635:	learn: 3.6815159	total: 1m 3s	remaining: 36.5s
636:	learn: 3.6771264	total: 1m 3s	remaining: 36.4s
637:	learn: 3.6714363	total: 1m 3s	remaining: 36.3s
638:	learn: 3.6660269	total: 1m 4s	remaining: 36.2s
639:	learn: 3.6621731	total: 1m 4s	remaining: 36.1s
640:	learn: 3.6610498	total: 1m 4s	remaining: 36s
641:	learn: 3.6547793	total: 1m 4s	remaining: 35.9s
642:	learn: 3.6497801	total: 1m 4s	remaining: 35.8s
643:	learn: 3.6488700	total: 1m 4s	remaining: 35.6s
644:	learn: 3.6465239	total: 1m 4s	remaining: 35.5s
645:	learn: 3.6455161	total: 1m 4s	remaining: 35.4s
646:	learn: 3.6413370	total: 1m 4s	remaining: 35.3s
647:	learn: 3.6356237	total: 1m 4s	remaining: 35.1s
648:	learn: 3.

785:	learn: 3.1052626	total: 1m 18s	remaining: 21.5s
786:	learn: 3.1040391	total: 1m 18s	remaining: 21.4s
787:	learn: 3.0995687	total: 1m 19s	remaining: 21.3s
788:	learn: 3.0981586	total: 1m 19s	remaining: 21.2s
789:	learn: 3.0948930	total: 1m 19s	remaining: 21.1s
790:	learn: 3.0901627	total: 1m 19s	remaining: 20.9s
791:	learn: 3.0874661	total: 1m 19s	remaining: 20.8s
792:	learn: 3.0843940	total: 1m 19s	remaining: 20.7s
793:	learn: 3.0824616	total: 1m 19s	remaining: 20.6s
794:	learn: 3.0782630	total: 1m 19s	remaining: 20.5s
795:	learn: 3.0754648	total: 1m 19s	remaining: 20.4s
796:	learn: 3.0723120	total: 1m 19s	remaining: 20.3s
797:	learn: 3.0721526	total: 1m 19s	remaining: 20.2s
798:	learn: 3.0712706	total: 1m 19s	remaining: 20.1s
799:	learn: 3.0690689	total: 1m 19s	remaining: 20s
800:	learn: 3.0660055	total: 1m 20s	remaining: 19.9s
801:	learn: 3.0636582	total: 1m 20s	remaining: 19.8s
802:	learn: 3.0585861	total: 1m 20s	remaining: 19.7s
803:	learn: 3.0539633	total: 1m 20s	remaining: 1

940:	learn: 2.6064865	total: 1m 29s	remaining: 5.62s
941:	learn: 2.6050568	total: 1m 29s	remaining: 5.53s
942:	learn: 2.6025636	total: 1m 29s	remaining: 5.43s
943:	learn: 2.6001619	total: 1m 30s	remaining: 5.34s
944:	learn: 2.5986933	total: 1m 30s	remaining: 5.24s
945:	learn: 2.5980480	total: 1m 30s	remaining: 5.15s
946:	learn: 2.5959585	total: 1m 30s	remaining: 5.05s
947:	learn: 2.5934561	total: 1m 30s	remaining: 4.96s
948:	learn: 2.5910927	total: 1m 30s	remaining: 4.86s
949:	learn: 2.5883603	total: 1m 30s	remaining: 4.76s
950:	learn: 2.5861159	total: 1m 30s	remaining: 4.67s
951:	learn: 2.5843045	total: 1m 30s	remaining: 4.57s
952:	learn: 2.5808962	total: 1m 30s	remaining: 4.47s
953:	learn: 2.5781713	total: 1m 30s	remaining: 4.37s
954:	learn: 2.5742582	total: 1m 30s	remaining: 4.28s
955:	learn: 2.5715515	total: 1m 30s	remaining: 4.18s
956:	learn: 2.5685305	total: 1m 31s	remaining: 4.09s
957:	learn: 2.5665279	total: 1m 31s	remaining: 3.99s
958:	learn: 2.5637710	total: 1m 31s	remaining:

98:	learn: 6.6508417	total: 6.87s	remaining: 1m 2s
99:	learn: 6.6390682	total: 6.94s	remaining: 1m 2s
100:	learn: 6.6303135	total: 7.02s	remaining: 1m 2s
101:	learn: 6.6231733	total: 7.07s	remaining: 1m 2s
102:	learn: 6.6111642	total: 7.15s	remaining: 1m 2s
103:	learn: 6.6042877	total: 7.22s	remaining: 1m 2s
104:	learn: 6.5992668	total: 7.31s	remaining: 1m 2s
105:	learn: 6.5963256	total: 7.38s	remaining: 1m 2s
106:	learn: 6.5899508	total: 7.45s	remaining: 1m 2s
107:	learn: 6.5751738	total: 7.5s	remaining: 1m 1s
108:	learn: 6.5624931	total: 7.57s	remaining: 1m 1s
109:	learn: 6.5444177	total: 7.66s	remaining: 1m 1s
110:	learn: 6.5420005	total: 7.7s	remaining: 1m 1s
111:	learn: 6.5358821	total: 7.75s	remaining: 1m 1s
112:	learn: 6.5314480	total: 7.87s	remaining: 1m 1s
113:	learn: 6.5295788	total: 7.97s	remaining: 1m 1s
114:	learn: 6.5271424	total: 8.02s	remaining: 1m 1s
115:	learn: 6.5249627	total: 8.08s	remaining: 1m 1s
116:	learn: 6.5215536	total: 8.18s	remaining: 1m 1s
117:	learn: 6.50

255:	learn: 6.0654208	total: 17s	remaining: 49.3s
256:	learn: 6.0470122	total: 17s	remaining: 49.1s
257:	learn: 6.0415524	total: 17s	remaining: 49s
258:	learn: 6.0405331	total: 17.1s	remaining: 48.9s
259:	learn: 6.0284744	total: 17.1s	remaining: 48.8s
260:	learn: 6.0249350	total: 17.2s	remaining: 48.7s
261:	learn: 6.0175480	total: 17.2s	remaining: 48.5s
262:	learn: 6.0159764	total: 17.3s	remaining: 48.4s
263:	learn: 6.0125659	total: 17.3s	remaining: 48.3s
264:	learn: 5.9967429	total: 17.4s	remaining: 48.2s
265:	learn: 5.9819728	total: 17.4s	remaining: 48.1s
266:	learn: 5.9702398	total: 17.5s	remaining: 48s
267:	learn: 5.9643772	total: 17.5s	remaining: 47.9s
268:	learn: 5.9621870	total: 17.6s	remaining: 47.9s
269:	learn: 5.9550008	total: 17.6s	remaining: 47.7s
270:	learn: 5.9466161	total: 17.7s	remaining: 47.7s
271:	learn: 5.9353012	total: 17.8s	remaining: 47.6s
272:	learn: 5.9219687	total: 17.8s	remaining: 47.4s
273:	learn: 5.9106513	total: 17.9s	remaining: 47.3s
274:	learn: 5.9048839	

413:	learn: 4.8930955	total: 27.1s	remaining: 38.4s
414:	learn: 4.8876189	total: 27.1s	remaining: 38.3s
415:	learn: 4.8814996	total: 27.2s	remaining: 38.2s
416:	learn: 4.8755593	total: 27.3s	remaining: 38.1s
417:	learn: 4.8685483	total: 27.3s	remaining: 38s
418:	learn: 4.8643228	total: 27.4s	remaining: 38s
419:	learn: 4.8619889	total: 27.4s	remaining: 37.9s
420:	learn: 4.8585572	total: 27.5s	remaining: 37.8s
421:	learn: 4.8503834	total: 27.6s	remaining: 37.8s
422:	learn: 4.8407834	total: 27.6s	remaining: 37.7s
423:	learn: 4.8338409	total: 27.7s	remaining: 37.6s
424:	learn: 4.8248101	total: 27.7s	remaining: 37.5s
425:	learn: 4.8200817	total: 27.9s	remaining: 37.6s
426:	learn: 4.8104411	total: 27.9s	remaining: 37.5s
427:	learn: 4.8014804	total: 28s	remaining: 37.4s
428:	learn: 4.7978456	total: 28.1s	remaining: 37.5s
429:	learn: 4.7877221	total: 28.2s	remaining: 37.4s
430:	learn: 4.7874061	total: 28.3s	remaining: 37.3s
431:	learn: 4.7832140	total: 28.3s	remaining: 37.2s
432:	learn: 4.7770

571:	learn: 3.9891831	total: 37.7s	remaining: 28.2s
572:	learn: 3.9844064	total: 37.8s	remaining: 28.1s
573:	learn: 3.9790791	total: 37.9s	remaining: 28.1s
574:	learn: 3.9776719	total: 37.9s	remaining: 28s
575:	learn: 3.9717069	total: 38s	remaining: 28s
576:	learn: 3.9714732	total: 38s	remaining: 27.9s
577:	learn: 3.9682819	total: 38.1s	remaining: 27.8s
578:	learn: 3.9594723	total: 38.2s	remaining: 27.8s
579:	learn: 3.9581478	total: 38.2s	remaining: 27.7s
580:	learn: 3.9507078	total: 38.4s	remaining: 27.7s
581:	learn: 3.9500413	total: 38.5s	remaining: 27.6s
582:	learn: 3.9467376	total: 38.6s	remaining: 27.6s
583:	learn: 3.9400081	total: 38.7s	remaining: 27.5s
584:	learn: 3.9338849	total: 38.7s	remaining: 27.5s
585:	learn: 3.9298403	total: 38.8s	remaining: 27.4s
586:	learn: 3.9281614	total: 38.8s	remaining: 27.3s
587:	learn: 3.9206055	total: 38.9s	remaining: 27.3s
588:	learn: 3.9148471	total: 39s	remaining: 27.2s
589:	learn: 3.9094202	total: 39s	remaining: 27.1s
590:	learn: 3.9065104	to

729:	learn: 3.3860080	total: 48.2s	remaining: 17.8s
730:	learn: 3.3817145	total: 48.2s	remaining: 17.7s
731:	learn: 3.3779971	total: 48.3s	remaining: 17.7s
732:	learn: 3.3770522	total: 48.4s	remaining: 17.6s
733:	learn: 3.3760439	total: 48.5s	remaining: 17.6s
734:	learn: 3.3742119	total: 48.6s	remaining: 17.5s
735:	learn: 3.3718815	total: 48.6s	remaining: 17.4s
736:	learn: 3.3717336	total: 48.8s	remaining: 17.4s
737:	learn: 3.3676856	total: 48.9s	remaining: 17.4s
738:	learn: 3.3654750	total: 49s	remaining: 17.3s
739:	learn: 3.3616325	total: 49s	remaining: 17.2s
740:	learn: 3.3585475	total: 49.1s	remaining: 17.2s
741:	learn: 3.3533776	total: 49.2s	remaining: 17.1s
742:	learn: 3.3511967	total: 49.3s	remaining: 17s
743:	learn: 3.3509542	total: 49.3s	remaining: 17s
744:	learn: 3.3480381	total: 49.4s	remaining: 16.9s
745:	learn: 3.3436145	total: 49.5s	remaining: 16.9s
746:	learn: 3.3392325	total: 49.5s	remaining: 16.8s
747:	learn: 3.3354656	total: 49.7s	remaining: 16.8s
748:	learn: 3.331710

Learning rate set to 0.048989
0:	learn: 11.1622187	total: 56.1ms	remaining: 56.1s
1:	learn: 10.9791512	total: 60.7ms	remaining: 30.3s
2:	learn: 10.8278192	total: 65ms	remaining: 21.6s
3:	learn: 10.6852435	total: 68.9ms	remaining: 17.2s
4:	learn: 10.5535493	total: 72.9ms	remaining: 14.5s
5:	learn: 10.4313505	total: 77.2ms	remaining: 12.8s
6:	learn: 10.2934368	total: 81.4ms	remaining: 11.5s
7:	learn: 10.1700676	total: 85.7ms	remaining: 10.6s
8:	learn: 10.0534184	total: 90.4ms	remaining: 9.95s
9:	learn: 9.9452631	total: 94.4ms	remaining: 9.34s
10:	learn: 9.8380127	total: 98.4ms	remaining: 8.85s
11:	learn: 9.7304594	total: 104ms	remaining: 8.57s
12:	learn: 9.6279486	total: 111ms	remaining: 8.45s
13:	learn: 9.5436665	total: 117ms	remaining: 8.21s
14:	learn: 9.4588974	total: 126ms	remaining: 8.26s
15:	learn: 9.3695626	total: 131ms	remaining: 8.09s
16:	learn: 9.3162956	total: 136ms	remaining: 7.87s
17:	learn: 9.2497745	total: 142ms	remaining: 7.73s
18:	learn: 9.1757445	total: 147ms	remaining:

188:	learn: 6.4487353	total: 1.16s	remaining: 4.99s
189:	learn: 6.4446966	total: 1.17s	remaining: 5s
190:	learn: 6.4353551	total: 1.18s	remaining: 4.99s
191:	learn: 6.4207883	total: 1.18s	remaining: 4.97s
192:	learn: 6.4108418	total: 1.19s	remaining: 4.96s
193:	learn: 6.4030859	total: 1.19s	remaining: 4.95s
194:	learn: 6.4011726	total: 1.2s	remaining: 4.94s
195:	learn: 6.3985891	total: 1.2s	remaining: 4.93s
196:	learn: 6.3938127	total: 1.21s	remaining: 4.92s
197:	learn: 6.3920296	total: 1.21s	remaining: 4.91s
198:	learn: 6.3782448	total: 1.22s	remaining: 4.91s
199:	learn: 6.3594187	total: 1.22s	remaining: 4.9s
200:	learn: 6.3471567	total: 1.23s	remaining: 4.89s
201:	learn: 6.3452069	total: 1.23s	remaining: 4.88s
202:	learn: 6.3434654	total: 1.24s	remaining: 4.87s
203:	learn: 6.3357620	total: 1.24s	remaining: 4.85s
204:	learn: 6.3221399	total: 1.25s	remaining: 4.85s
205:	learn: 6.3204778	total: 1.25s	remaining: 4.84s
206:	learn: 6.3165220	total: 1.26s	remaining: 4.83s
207:	learn: 6.3126

352:	learn: 5.2519310	total: 2.14s	remaining: 3.92s
353:	learn: 5.2463154	total: 2.14s	remaining: 3.91s
354:	learn: 5.2410692	total: 2.15s	remaining: 3.9s
355:	learn: 5.2350716	total: 2.16s	remaining: 3.9s
356:	learn: 5.2300564	total: 2.16s	remaining: 3.89s
357:	learn: 5.2251745	total: 2.17s	remaining: 3.88s
358:	learn: 5.2166760	total: 2.17s	remaining: 3.88s
359:	learn: 5.2120732	total: 2.18s	remaining: 3.87s
360:	learn: 5.2098331	total: 2.19s	remaining: 3.87s
361:	learn: 5.2002098	total: 2.19s	remaining: 3.86s
362:	learn: 5.1946426	total: 2.2s	remaining: 3.85s
363:	learn: 5.1941736	total: 2.2s	remaining: 3.85s
364:	learn: 5.1879159	total: 2.21s	remaining: 3.84s
365:	learn: 5.1797369	total: 2.21s	remaining: 3.84s
366:	learn: 5.1736444	total: 2.22s	remaining: 3.83s
367:	learn: 5.1731090	total: 2.22s	remaining: 3.82s
368:	learn: 5.1596445	total: 2.23s	remaining: 3.81s
369:	learn: 5.1533361	total: 2.23s	remaining: 3.8s
370:	learn: 5.1442880	total: 2.24s	remaining: 3.8s
371:	learn: 5.1356

536:	learn: 4.3072794	total: 3.31s	remaining: 2.85s
537:	learn: 4.2981005	total: 3.31s	remaining: 2.85s
538:	learn: 4.2948074	total: 3.32s	remaining: 2.84s
539:	learn: 4.2944375	total: 3.33s	remaining: 2.83s
540:	learn: 4.2895859	total: 3.33s	remaining: 2.83s
541:	learn: 4.2841665	total: 3.34s	remaining: 2.82s
542:	learn: 4.2778864	total: 3.35s	remaining: 2.82s
543:	learn: 4.2740333	total: 3.36s	remaining: 2.82s
544:	learn: 4.2684472	total: 3.37s	remaining: 2.81s
545:	learn: 4.2629814	total: 3.37s	remaining: 2.8s
546:	learn: 4.2627468	total: 3.38s	remaining: 2.8s
547:	learn: 4.2583017	total: 3.38s	remaining: 2.79s
548:	learn: 4.2534701	total: 3.39s	remaining: 2.78s
549:	learn: 4.2471245	total: 3.4s	remaining: 2.78s
550:	learn: 4.2428893	total: 3.4s	remaining: 2.77s
551:	learn: 4.2380425	total: 3.41s	remaining: 2.77s
552:	learn: 4.2330835	total: 3.42s	remaining: 2.76s
553:	learn: 4.2299155	total: 3.42s	remaining: 2.76s
554:	learn: 4.2239199	total: 3.43s	remaining: 2.75s
555:	learn: 4.21

720:	learn: 3.6228402	total: 4.48s	remaining: 1.73s
721:	learn: 3.6220521	total: 4.48s	remaining: 1.73s
722:	learn: 3.6185192	total: 4.49s	remaining: 1.72s
723:	learn: 3.6182807	total: 4.49s	remaining: 1.71s
724:	learn: 3.6143002	total: 4.5s	remaining: 1.71s
725:	learn: 3.6124253	total: 4.5s	remaining: 1.7s
726:	learn: 3.6080044	total: 4.51s	remaining: 1.69s
727:	learn: 3.6045165	total: 4.51s	remaining: 1.69s
728:	learn: 3.5991659	total: 4.52s	remaining: 1.68s
729:	learn: 3.5951386	total: 4.53s	remaining: 1.68s
730:	learn: 3.5926862	total: 4.54s	remaining: 1.67s
731:	learn: 3.5900358	total: 4.54s	remaining: 1.66s
732:	learn: 3.5862087	total: 4.55s	remaining: 1.66s
733:	learn: 3.5817626	total: 4.56s	remaining: 1.65s
734:	learn: 3.5809345	total: 4.56s	remaining: 1.64s
735:	learn: 3.5784116	total: 4.57s	remaining: 1.64s
736:	learn: 3.5732682	total: 4.57s	remaining: 1.63s
737:	learn: 3.5678277	total: 4.58s	remaining: 1.62s
738:	learn: 3.5642177	total: 4.58s	remaining: 1.62s
739:	learn: 3.5

882:	learn: 3.1758646	total: 5.44s	remaining: 721ms
883:	learn: 3.1721104	total: 5.44s	remaining: 714ms
884:	learn: 3.1693813	total: 5.45s	remaining: 708ms
885:	learn: 3.1657962	total: 5.46s	remaining: 702ms
886:	learn: 3.1637444	total: 5.46s	remaining: 696ms
887:	learn: 3.1588064	total: 5.47s	remaining: 689ms
888:	learn: 3.1561524	total: 5.47s	remaining: 683ms
889:	learn: 3.1533377	total: 5.47s	remaining: 677ms
890:	learn: 3.1514124	total: 5.48s	remaining: 671ms
891:	learn: 3.1497604	total: 5.49s	remaining: 664ms
892:	learn: 3.1483761	total: 5.49s	remaining: 658ms
893:	learn: 3.1466604	total: 5.5s	remaining: 652ms
894:	learn: 3.1450055	total: 5.5s	remaining: 646ms
895:	learn: 3.1426207	total: 5.51s	remaining: 639ms
896:	learn: 3.1400275	total: 5.51s	remaining: 633ms
897:	learn: 3.1371581	total: 5.52s	remaining: 627ms
898:	learn: 3.1340332	total: 5.53s	remaining: 621ms
899:	learn: 3.1322877	total: 5.53s	remaining: 615ms
900:	learn: 3.1288119	total: 5.54s	remaining: 609ms
901:	learn: 3.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


890:	learn: 2.8358527	total: 1m 1s	remaining: 7.47s
891:	learn: 2.8334169	total: 1m 1s	remaining: 7.4s
892:	learn: 2.8304420	total: 1m 1s	remaining: 7.34s
893:	learn: 2.8271004	total: 1m 1s	remaining: 7.26s
894:	learn: 2.8235591	total: 1m 1s	remaining: 7.2s
895:	learn: 2.8201179	total: 1m 1s	remaining: 7.13s
896:	learn: 2.8178994	total: 1m 1s	remaining: 7.07s
897:	learn: 2.8144318	total: 1m 1s	remaining: 7s
898:	learn: 2.8107130	total: 1m 1s	remaining: 6.94s
899:	learn: 2.8099169	total: 1m 1s	remaining: 6.87s
900:	learn: 2.8079969	total: 1m 1s	remaining: 6.8s
901:	learn: 2.8042558	total: 1m 1s	remaining: 6.73s
902:	learn: 2.8017856	total: 1m 2s	remaining: 6.66s
903:	learn: 2.7993298	total: 1m 2s	remaining: 6.59s
904:	learn: 2.7966056	total: 1m 2s	remaining: 6.52s
905:	learn: 2.7963059	total: 1m 2s	remaining: 6.45s
906:	learn: 2.7919503	total: 1m 2s	remaining: 6.38s
907:	learn: 2.7890431	total: 1m 2s	remaining: 6.31s
908:	learn: 2.7864428	total: 1m 2s	remaining: 6.25s
909:	learn: 2.7846