In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance

from sklearn.ensemble import RandomForestRegressor
from category_encoders.james_stein import JamesSteinEncoder

from sklearn.metrics import r2_score

##### В качестве датасета используется: https://www.kaggle.com/competitions/mercedes-benz-greener-manufacturing

In [2]:
df = pd.read_csv('data/train.csv')

In [4]:
print(df.shape)
df.head()

(4209, 378)


Unnamed: 0,ID,y,X0,X1,X2,X3,X4,X5,X6,X8,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,0,130.81,k,v,at,a,d,u,j,o,...,0,0,1,0,0,0,0,0,0,0
1,6,88.53,k,t,av,e,d,y,l,o,...,1,0,0,0,0,0,0,0,0,0
2,7,76.26,az,w,n,c,d,x,j,x,...,0,0,0,0,0,0,1,0,0,0
3,9,80.62,az,t,n,f,d,x,l,e,...,0,0,0,0,0,0,0,0,0,0
4,13,78.02,az,v,n,f,d,h,d,n,...,0,0,0,0,0,0,0,0,0,0


##### Бейзлайн 
В качестве бейзлайна будет использоваться отбор признаков и построение модели lightgbm с дефолтными параметрами.  
Метрика для оценки из соревнования R^2.

In [19]:
X_train, X_val, y_train, y_val = train_test_split(df.drop(['ID', 'y'], axis=1), df['y'], test_size=0.25, random_state=123)

In [20]:
cat_columns = X_train.select_dtypes(include=object).columns
num_columns = X_train.select_dtypes(exclude=object).columns

# По дефолту заполним пустые значения для непрерывных признаков медианой.
nan_encoding_num = {}
for feat in num_columns:
    nan_encoding_num[feat] = X_train[feat].median()
    X_train[feat].fillna(nan_encoding_num[feat], inplace=True)
    X_val[feat].fillna(nan_encoding_num[feat], inplace=True)

# Закодируем категориальные признаки преобразованием James-Stein
cat_encoder = JamesSteinEncoder(cols=cat_columns, return_df=True, handle_missing='value', handle_unknown='value')

X_train[cat_columns] = cat_encoder.fit_transform(X_train[cat_columns], y_train)
X_val[cat_columns] = cat_encoder.transform(X_val[cat_columns])

In [52]:
# Отберем признаки по значениям feature_importance для RandomForest

model = RandomForestRegressor(n_estimators=50, max_depth=10)
model.fit(X_train, y_train)

RandomForestRegressor(max_depth=10, n_estimators=50)

In [53]:
# Отбираем топ 75% признаков на данном этапе

df_feat_import = pd.DataFrame({'feature_importance': model.feature_importances_, 'feature_names': X_train.columns})
threshold = np.percentile(df_feat_import['feature_importance'], 25)
features_1 = df_feat_import[df_feat_import['feature_importance'] >= threshold]['feature_names'].values

In [45]:
# Отбираем признаки по permutation importance так же на основе RandomForest.

# Сделаем разделение train выборки для того чтобы иметь валидационную выборку для отбора признаков.
X_train_model, X_val_model, y_train_model, y_val_model = train_test_split(X_train, y_train, test_size=0.2, random_state=123)

model = RandomForestRegressor(n_estimators=50, max_depth=10)
model.fit(X_train_model[features_1], y_train_model)

r = permutation_importance(model, X_val_model[features_1], y_val_model,
                            n_repeats=30,
                            random_state=123)


In [54]:
# Отбираем ещё топ 60% признаков на данном этапе

df_feat_import = pd.DataFrame({'feature_importance': r['importances_mean'], 'feature_names': X_train[features_1].columns})
threshold = np.percentile(df_feat_import['feature_importance'], 40)
features_2 = df_feat_import[df_feat_import['feature_importance'] >= threshold]['feature_names'].values

In [4]:
from lightgbm import LGBMRegressor

In [58]:
# Обучим модель lightgbm на отобранных признаках, посчитаем метрику R^2 на валидационной выборке.

lgb = LGBMRegressor()
lgb.fit(X_train[features_2], y_train)

LGBMRegressor()

In [60]:
y_val_pred = lgb.predict(X_val[features_2])
score = r2_score(y_val, y_val_pred)
print(f'R^2 for validation = {round(score, 3)}')

R^2 for validation = 0.559


#### В качестве первого AutoML будем использовать LightAutoML (LAMA)
https://github.com/sb-ai-lab/LightAutoML   
в нем уже есть встроенный отбор признаков и обработка NaN, категорий   

In [None]:
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [15]:
X_train, X_val, y_train, y_val = train_test_split(df.drop(['ID'], axis=1), df['y'], test_size=0.25, random_state=123)

In [16]:
task = Task(name='reg',
            metric=lambda y_true, y_pred: r2_score(y_true, y_pred))

automl = TabularAutoML(task=task, timeout=600)

oof_pred = automl.fit_predict(
    X_train,
    roles = {'target': 'y'}
)



In [17]:
y_val_pred = automl.predict(X_val)
score = r2_score(y_val, y_val_pred.data)
print(f'R^2 for validation = {round(score, 3)}')

R^2 for validation = 0.582


#### В качестве второго примера AutoML возьмем Alex-autoML
https://github.com/Alex-Lekov/AutoML_Alex

In [None]:
!pip install automl-alex

In [7]:
X_train, X_val, y_train, y_val = train_test_split(df.drop(['ID', 'y'], axis=1), df['y'], test_size=0.25, random_state=123)

In [11]:
from automl_alex import AutoMLRegressor

model = AutoMLRegressor()
model.fit(X_train.reset_index(drop=True), y_train.reset_index(drop=True), timeout=600)

[32m18:53:46[0m | [1m> Start Fit Base Model[0m
[32m18:54:37[0m | [1m##################################################[0m
[32m18:54:37[0m | [1m> Start Fit Models 2[0m
[32m18:54:37[0m | [1m##################################################[0m
[32m18:54:37[0m | [1m##################################################[0m
[32m18:54:38[0m | [1m> Step 1: calc parameters and pruned score: get test 10 trials[0m




[32m18:55:23[0m | [1m One iteration ~ 4.5 sec[0m
[32m18:55:23[0m | [1m Possible iters ~ 94.0[0m
[32m18:55:23[0m | [33m[1m! Not enough time to find the optimal parameters. 
                     Possible iters < 100. 
                     Please, Increase the 'timeout' parameter for normal optimization.[0m
[32m18:55:23[0m | [1m--------------------------------------------------[0m
[32m18:55:23[0m | [1m  Pruned Threshold Score: 72.0421[0m
[32m18:55:23[0m | [1m##################################################[0m
[32m18:55:23[0m | [1m> Step 2: Full opt with Threshold Score Pruner[0m
[32m18:55:23[0m | [1m##################################################[0m
[32m18:55:23[0m | [1m> Start optimization with the parameters:[0m
[32m18:55:23[0m | [1mCV_Folds = 7[0m
[32m18:55:23[0m | [1mScore_CV_Folds = 3[0m
[32m18:55:23[0m | [1mFeature_Selection = False[0m
[32m18:55:23[0m | [1mOpt_lvl = 1[0m
[32m18:55:23[0m | [1mCold_start = 15[0m
[32m18:55:



Optimize: : 1it [00:02,  2.25s/it, | Model: LightGBM | OptScore: 67.5542 | Best mean_squared_error: 66.5076 ]



Optimize: : 2it [00:06,  3.41s/it, | Model: LightGBM | OptScore: 63.9112 | Best mean_squared_error: 66.5076 ]



Optimize: : 3it [00:08,  2.86s/it, | Model: LightGBM | OptScore: 69.4148 | Best mean_squared_error: 63.9112 ]



Optimize: : 4it [00:10,  2.31s/it, | Model: LightGBM | OptScore: 65.3765 | Best mean_squared_error: 63.9112 ]



Optimize: : 5it [00:12,  2.28s/it, | Model: LightGBM | OptScore: 65.7121 | Best mean_squared_error: 63.9112 ]



Optimize: : 6it [00:13,  2.02s/it, | Model: LightGBM | OptScore: 64.1184 | Best mean_squared_error: 63.9112 ]



Optimize: : 7it [00:15,  1.85s/it, | Model: LightGBM | OptScore: 64.8884 | Best mean_squared_error: 63.9112 ]



Optimize: : 8it [00:17,  2.07s/it, | Model: LightGBM | OptScore: 64.8108 | Best mean_squared_error: 63.9112 ]



Optimize: : 9it [00:20,  2.09s/it, | Model: LightGBM | OptScore: 66.6023 | Best mean_squared_error: 63.9112 ]



Optimize: : 10it [00:21,  1.90s/it, | Model: LightGBM | OptScore: 63.9937 | Best mean_squared_error: 63.9112 ]



Optimize: : 11it [00:23,  1.78s/it, | Model: LightGBM | OptScore: 63.9561 | Best mean_squared_error: 63.9112 ]



Optimize: : 12it [00:24,  1.70s/it, | Model: LightGBM | OptScore: 65.602 | Best mean_squared_error: 63.9112 ] 



Optimize: : 13it [00:26,  1.62s/it, | Model: LightGBM | OptScore: 65.0214 | Best mean_squared_error: 63.9112 ]



Optimize: : 14it [00:27,  1.68s/it, | Model: LightGBM | OptScore: 64.8333 | Best mean_squared_error: 63.9112 ]



Optimize: : 15it [00:29,  1.65s/it, | Model: LightGBM | OptScore: 64.363 | Best mean_squared_error: 63.9112 ] 



Optimize: : 16it [00:32,  2.04s/it, | Model: LightGBM | OptScore: 64.8306 | Best mean_squared_error: 63.9112 ]



Optimize: : 17it [00:34,  2.06s/it, | Model: LightGBM | OptScore: 65.4155 | Best mean_squared_error: 63.9112 ]



Optimize: : 18it [00:36,  2.00s/it, | Model: LightGBM | OptScore: 64.7897 | Best mean_squared_error: 63.9112 ]



Optimize: : 19it [00:38,  2.00s/it, | Model: LightGBM | OptScore: 64.0598 | Best mean_squared_error: 63.9112 ]



Optimize: : 20it [00:40,  1.96s/it, | Model: LightGBM | OptScore: 65.4133 | Best mean_squared_error: 63.9112 ]



Optimize: : 21it [00:42,  1.92s/it, | Model: LightGBM | OptScore: 64.1559 | Best mean_squared_error: 63.9112 ]



Optimize: : 22it [00:43,  1.90s/it, | Model: LightGBM | OptScore: 65.0754 | Best mean_squared_error: 63.9112 ]



Optimize: : 23it [00:45,  1.86s/it, | Model: LightGBM | OptScore: 64.9728 | Best mean_squared_error: 63.9112 ]



Optimize: : 24it [00:48,  2.08s/it, | Model: LightGBM | OptScore: 73.5907 | Best mean_squared_error: 63.9112 ]



Optimize: : 25it [00:49,  1.92s/it, | Model: LightGBM | OptScore: 65.0374 | Best mean_squared_error: 63.9112 ]



Optimize: : 26it [00:51,  1.80s/it, | Model: LightGBM | OptScore: 64.1855 | Best mean_squared_error: 63.9112 ]



Optimize: : 27it [00:54,  2.20s/it, | Model: LightGBM | OptScore: 66.9336 | Best mean_squared_error: 63.9112 ]



Optimize: : 28it [00:56,  2.06s/it, | Model: LightGBM | OptScore: 68.5301 | Best mean_squared_error: 63.9112 ]



Optimize: : 29it [00:58,  2.10s/it, | Model: LightGBM | OptScore: 64.9742 | Best mean_squared_error: 63.9112 ]



Optimize: : 30it [01:00,  1.97s/it, | Model: LightGBM | OptScore: 64.2066 | Best mean_squared_error: 63.9112 ]



Optimize: : 31it [01:01,  1.81s/it, | Model: LightGBM | OptScore: 64.0475 | Best mean_squared_error: 63.9112 ]



Optimize: : 32it [01:03,  1.75s/it, | Model: LightGBM | OptScore: 64.7287 | Best mean_squared_error: 63.9112 ]



Optimize: : 33it [01:04,  1.66s/it, | Model: LightGBM | OptScore: 63.9372 | Best mean_squared_error: 63.9112 ]



Optimize: : 34it [01:06,  1.81s/it, | Model: LightGBM | OptScore: 64.1945 | Best mean_squared_error: 63.9112 ]



Optimize: : 35it [01:07,  1.61s/it, | Model: LightGBM | OptScore: 65.4826 | Best mean_squared_error: 63.9112 ]



Optimize: : 36it [01:10,  1.85s/it, | Model: LightGBM | OptScore: 74.3092 | Best mean_squared_error: 63.9112 ]



Optimize: : 37it [01:13,  2.20s/it, | Model: LightGBM | OptScore: 64.0227 | Best mean_squared_error: 63.9112 ]



Optimize: : 38it [01:15,  2.23s/it, | Model: LightGBM | OptScore: 64.9525 | Best mean_squared_error: 63.9112 ]



Optimize: : 39it [01:18,  2.56s/it, | Model: LightGBM | OptScore: 64.2309 | Best mean_squared_error: 63.9112 ]



Optimize: : 40it [01:21,  2.52s/it, | Model: LightGBM | OptScore: 64.0262 | Best mean_squared_error: 63.9112 ]



Optimize: : 41it [01:23,  2.48s/it, | Model: LightGBM | OptScore: 64.218 | Best mean_squared_error: 63.9112 ] 



Optimize: : 42it [01:26,  2.58s/it, | Model: LightGBM | OptScore: 64.0398 | Best mean_squared_error: 63.9112 ]



Optimize: : 43it [01:28,  2.52s/it, | Model: LightGBM | OptScore: 67.9901 | Best mean_squared_error: 63.9112 ]



Optimize: : 44it [01:31,  2.64s/it, | Model: LightGBM | OptScore: 65.0412 | Best mean_squared_error: 63.9112 ]



Optimize: : 45it [01:34,  2.59s/it, | Model: LightGBM | OptScore: 64.3322 | Best mean_squared_error: 63.9112 ]



Optimize: : 46it [01:36,  2.61s/it, | Model: LightGBM | OptScore: 63.9295 | Best mean_squared_error: 63.9112 ]



Optimize: : 47it [01:39,  2.67s/it, | Model: LightGBM | OptScore: 66.6496 | Best mean_squared_error: 63.9112 ]



Optimize: : 48it [01:42,  2.83s/it, | Model: LightGBM | OptScore: 65.9783 | Best mean_squared_error: 63.9112 ]



Optimize: : 49it [01:45,  2.87s/it, | Model: LightGBM | OptScore: 64.2083 | Best mean_squared_error: 63.9112 ]



Optimize: : 50it [01:47,  2.51s/it, | Model: LightGBM | OptScore: 65.2963 | Best mean_squared_error: 63.9112 ]



Optimize: : 51it [01:50,  2.50s/it, | Model: LightGBM | OptScore: 63.7252 | Best mean_squared_error: 63.9112 ]



Optimize: : 52it [01:52,  2.54s/it, | Model: LightGBM | OptScore: 63.8069 | Best mean_squared_error: 63.7252 ]



Optimize: : 53it [01:55,  2.49s/it, | Model: LightGBM | OptScore: 65.0197 | Best mean_squared_error: 63.7252 ]



Optimize: : 54it [01:57,  2.56s/it, | Model: LightGBM | OptScore: 63.8587 | Best mean_squared_error: 63.7252 ]



Optimize: : 55it [02:00,  2.57s/it, | Model: LightGBM | OptScore: 63.7862 | Best mean_squared_error: 63.7252 ]



Optimize: : 56it [02:03,  2.60s/it, | Model: LightGBM | OptScore: 63.8946 | Best mean_squared_error: 63.7252 ]



Optimize: : 57it [02:05,  2.64s/it, | Model: LightGBM | OptScore: 63.8326 | Best mean_squared_error: 63.7252 ]



Optimize: : 58it [02:08,  2.72s/it, | Model: LightGBM | OptScore: 63.7963 | Best mean_squared_error: 63.7252 ]



Optimize: : 59it [02:11,  2.75s/it, | Model: LightGBM | OptScore: 65.6951 | Best mean_squared_error: 63.7252 ]



Optimize: : 60it [02:13,  2.56s/it, | Model: LightGBM | OptScore: 64.1384 | Best mean_squared_error: 63.7252 ]



Optimize: : 61it [02:22,  4.44s/it, | Model: LightGBM | OptScore: 69.3084 | Best mean_squared_error: 63.7252 ]



Optimize: : 62it [02:26,  4.19s/it, | Model: LightGBM | OptScore: 64.0134 | Best mean_squared_error: 63.7252 ]



Optimize: : 63it [02:29,  3.92s/it, | Model: LightGBM | OptScore: 64.1692 | Best mean_squared_error: 63.7252 ]



Optimize: : 64it [02:32,  3.60s/it, | Model: LightGBM | OptScore: 63.9674 | Best mean_squared_error: 63.7252 ]



Optimize: : 65it [02:35,  3.48s/it, | Model: LightGBM | OptScore: 64.2899 | Best mean_squared_error: 63.7252 ]



Optimize: : 66it [02:38,  3.24s/it, | Model: LightGBM | OptScore: 63.8599 | Best mean_squared_error: 63.7252 ]



Optimize: : 67it [02:40,  3.11s/it, | Model: LightGBM | OptScore: 63.8978 | Best mean_squared_error: 63.7252 ]



Optimize: : 68it [02:43,  3.10s/it, | Model: LightGBM | OptScore: 64.875 | Best mean_squared_error: 63.7252 ] 



Optimize: : 69it [02:46,  3.06s/it, | Model: LightGBM | OptScore: 63.8568 | Best mean_squared_error: 63.7252 ]



Optimize: : 70it [02:49,  2.93s/it, | Model: LightGBM | OptScore: 71.4131 | Best mean_squared_error: 63.7252 ]



Optimize: : 71it [02:52,  2.84s/it, | Model: LightGBM | OptScore: 63.8209 | Best mean_squared_error: 63.7252 ]



Optimize: : 72it [02:55,  2.84s/it, | Model: LightGBM | OptScore: 63.962 | Best mean_squared_error: 63.7252 ] 



Optimize: : 73it [02:57,  2.80s/it, | Model: LightGBM | OptScore: 63.8931 | Best mean_squared_error: 63.7252 ]



Optimize: : 74it [03:00,  2.71s/it, | Model: LightGBM | OptScore: 64.3404 | Best mean_squared_error: 63.7252 ]



Optimize: : 75it [03:03,  2.81s/it, | Model: LightGBM | OptScore: 64.1094 | Best mean_squared_error: 63.7252 ]



Optimize: : 76it [03:06,  2.91s/it, | Model: LightGBM | OptScore: 63.9408 | Best mean_squared_error: 63.7252 ]



Optimize: : 77it [03:09,  2.95s/it, | Model: LightGBM | OptScore: 63.851 | Best mean_squared_error: 63.7252 ] 



Optimize: : 78it [03:12,  2.90s/it, | Model: LightGBM | OptScore: 63.7961 | Best mean_squared_error: 63.7252 ]



Optimize: : 79it [03:14,  2.76s/it, | Model: LightGBM | OptScore: 63.7288 | Best mean_squared_error: 63.7252 ]



Optimize: : 80it [03:17,  2.78s/it, | Model: LightGBM | OptScore: 63.9582 | Best mean_squared_error: 63.7252 ]



Optimize: : 81it [03:19,  2.59s/it, | Model: LightGBM | OptScore: 64.2002 | Best mean_squared_error: 63.7252 ]



Optimize: : 82it [03:21,  2.50s/it, | Model: LightGBM | OptScore: 64.0282 | Best mean_squared_error: 63.7252 ]



Optimize: : 83it [03:23,  2.36s/it, | Model: LightGBM | OptScore: 63.916 | Best mean_squared_error: 63.7252 ] 



Optimize: : 84it [03:26,  2.28s/it, | Model: LightGBM | OptScore: 63.8938 | Best mean_squared_error: 63.7252 ]



Optimize: : 85it [03:28,  2.45s/it, | Model: LightGBM | OptScore: 64.5487 | Best mean_squared_error: 63.7252 ]



Optimize: : 86it [03:31,  2.51s/it, | Model: LightGBM | OptScore: 63.9938 | Best mean_squared_error: 63.7252 ]



Optimize: : 87it [03:34,  2.63s/it, | Model: LightGBM | OptScore: 64.4873 | Best mean_squared_error: 63.7252 ]



Optimize: : 88it [03:36,  2.39s/it, | Model: LightGBM | OptScore: 64.0331 | Best mean_squared_error: 63.7252 ]



Optimize: : 89it [03:39,  2.66s/it, | Model: LightGBM | OptScore: 64.0714 | Best mean_squared_error: 63.7252 ]



Optimize: : 90it [03:42,  2.65s/it, | Model: LightGBM | OptScore: 64.0632 | Best mean_squared_error: 63.7252 ]



Optimize: : 91it [03:44,  2.58s/it, | Model: LightGBM | OptScore: 63.8393 | Best mean_squared_error: 63.7252 ]



Optimize: : 92it [03:47,  2.55s/it, | Model: LightGBM | OptScore: 63.9047 | Best mean_squared_error: 63.7252 ]



Optimize: : 93it [03:49,  2.63s/it, | Model: LightGBM | OptScore: 64.0762 | Best mean_squared_error: 63.7252 ]



Optimize: : 94it [03:52,  2.65s/it, | Model: LightGBM | OptScore: 63.9293 | Best mean_squared_error: 63.7252 ]



Optimize: : 95it [03:55,  2.65s/it, | Model: LightGBM | OptScore: 63.7165 | Best mean_squared_error: 63.7252 ]



Optimize: : 96it [03:57,  2.61s/it, | Model: LightGBM | OptScore: 63.7693 | Best mean_squared_error: 63.7165 ]



Optimize: : 97it [04:00,  2.55s/it, | Model: LightGBM | OptScore: 64.1446 | Best mean_squared_error: 63.7165 ]



Optimize: : 98it [04:05,  3.29s/it, | Model: LightGBM | OptScore: 67.608 | Best mean_squared_error: 63.7165 ] 



Optimize: : 99it [04:08,  3.22s/it, | Model: LightGBM | OptScore: 63.9263 | Best mean_squared_error: 63.7165 ]



Optimize: : 100it [04:10,  2.80s/it, | Model: LightGBM | OptScore: 64.3342 | Best mean_squared_error: 63.7165 ]



Optimize: : 101it [04:12,  2.75s/it, | Model: LightGBM | OptScore: 63.8272 | Best mean_squared_error: 63.7165 ]



Optimize: : 102it [04:15,  2.65s/it, | Model: LightGBM | OptScore: 64.7356 | Best mean_squared_error: 63.7165 ]



Optimize: : 103it [04:17,  2.62s/it, | Model: LightGBM | OptScore: 63.8678 | Best mean_squared_error: 63.7165 ]



Optimize: : 104it [04:20,  2.55s/it, | Model: LightGBM | OptScore: 63.8673 | Best mean_squared_error: 63.7165 ]



Optimize: : 105it [04:22,  2.63s/it, | Model: LightGBM | OptScore: 63.9873 | Best mean_squared_error: 63.7165 ]



Optimize: : 106it [04:25,  2.62s/it, | Model: LightGBM | OptScore: 63.9505 | Best mean_squared_error: 63.7165 ]



Optimize: : 107it [04:28,  2.77s/it, | Model: LightGBM | OptScore: 63.923 | Best mean_squared_error: 63.7165 ] 



Optimize: : 108it [04:31,  2.75s/it, | Model: LightGBM | OptScore: 63.7414 | Best mean_squared_error: 63.7165 ]



Optimize: : 109it [04:33,  2.71s/it, | Model: LightGBM | OptScore: 63.8737 | Best mean_squared_error: 63.7165 ]



Optimize: : 110it [04:36,  2.65s/it, | Model: LightGBM | OptScore: 63.8641 | Best mean_squared_error: 63.7165 ]



Optimize: : 111it [04:39,  2.63s/it, | Model: LightGBM | OptScore: 63.7974 | Best mean_squared_error: 63.7165 ]



Optimize: : 112it [04:41,  2.58s/it, | Model: LightGBM | OptScore: 63.935 | Best mean_squared_error: 63.7165 ] 



Optimize: : 113it [04:44,  2.59s/it, | Model: LightGBM | OptScore: 63.7846 | Best mean_squared_error: 63.7165 ]



Optimize: : 114it [04:46,  2.62s/it, | Model: LightGBM | OptScore: 63.8957 | Best mean_squared_error: 63.7165 ]



Optimize: : 115it [04:49,  2.69s/it, | Model: LightGBM | OptScore: 63.9383 | Best mean_squared_error: 63.7165 ]



Optimize: : 116it [04:52,  2.71s/it, | Model: LightGBM | OptScore: 64.0633 | Best mean_squared_error: 63.7165 ]



Optimize: : 117it [04:55,  2.80s/it, | Model: LightGBM | OptScore: 64.397 | Best mean_squared_error: 63.7165 ] 



Optimize: : 118it [04:57,  2.71s/it, | Model: LightGBM | OptScore: 67.394 | Best mean_squared_error: 63.7165 ]



Optimize: : 119it [05:01,  2.84s/it, | Model: LightGBM | OptScore: 65.8238 | Best mean_squared_error: 63.7165 ]



Optimize: : 120it [05:03,  2.63s/it, | Model: LightGBM | OptScore: 63.9124 | Best mean_squared_error: 63.7165 ]



Optimize: : 121it [05:05,  2.58s/it, | Model: LightGBM | OptScore: 63.7963 | Best mean_squared_error: 63.7165 ]



Optimize: : 122it [05:08,  2.61s/it, | Model: LightGBM | OptScore: 63.8609 | Best mean_squared_error: 63.7165 ]



Optimize: : 123it [05:11,  2.70s/it, | Model: LightGBM | OptScore: 64.1363 | Best mean_squared_error: 63.7165 ]



Optimize: : 124it [05:13,  2.64s/it, | Model: LightGBM | OptScore: 64.0674 | Best mean_squared_error: 63.7165 ]



Optimize: : 125it [05:16,  2.65s/it, | Model: LightGBM | OptScore: 63.9145 | Best mean_squared_error: 63.7165 ]



Optimize: : 126it [05:18,  2.61s/it, | Model: LightGBM | OptScore: 64.6037 | Best mean_squared_error: 63.7165 ]



Optimize: : 127it [05:22,  2.75s/it, | Model: LightGBM | OptScore: 64.3487 | Best mean_squared_error: 63.7165 ]



Optimize: : 128it [05:24,  2.64s/it, | Model: LightGBM | OptScore: 64.8202 | Best mean_squared_error: 63.7165 ]



Optimize: : 129it [05:27,  2.64s/it, | Model: LightGBM | OptScore: 63.7409 | Best mean_squared_error: 63.7165 ]



Optimize: : 130it [05:29,  2.62s/it, | Model: LightGBM | OptScore: 63.8437 | Best mean_squared_error: 63.7165 ]



Optimize: : 131it [05:32,  2.62s/it, | Model: LightGBM | OptScore: 63.8635 | Best mean_squared_error: 63.7165 ]



Optimize: : 132it [05:34,  2.64s/it, | Model: LightGBM | OptScore: 63.7538 | Best mean_squared_error: 63.7165 ]



Optimize: : 133it [05:37,  2.64s/it, | Model: LightGBM | OptScore: 63.958 | Best mean_squared_error: 63.7165 ] 



Optimize: : 134it [05:40,  2.68s/it, | Model: LightGBM | OptScore: 63.8341 | Best mean_squared_error: 63.7165 ]



Optimize: : 135it [05:43,  2.71s/it, | Model: LightGBM | OptScore: 64.2436 | Best mean_squared_error: 63.7165 ]



Optimize: : 136it [05:45,  2.47s/it, | Model: LightGBM | OptScore: 63.9227 | Best mean_squared_error: 63.7165 ]



Optimize: : 137it [05:47,  2.54s/it, | Model: LightGBM | OptScore: 64.0985 | Best mean_squared_error: 63.7165 ]



Optimize: : 138it [05:51,  2.55s/it, | Model: LightGBM | OptScore: 63.8578 | Best mean_squared_error: 63.7165 ]
[32m19:01:15[0m | [1m> Finish Opt![0m
[32m19:01:15[0m | [1mBest Score: 63.7165 mean_squared_error[0m
[32m19:01:15[0m | [1m##################################################[0m
[32m19:01:15[0m | [1m> Fit Best Models[0m
[32m19:01:15[0m | [1m##################################################[0m
[32m19:01:21[0m | [1mSave DataPrepare[0m
[32m19:01:26[0m | [1mSave DataPrepare[0m
[32m19:01:30[0m | [1mSave DataPrepare[0m
[32m19:01:36[0m | [1mSave DataPrepare[0m
[32m19:01:42[0m | [1mSave DataPrepare[0m
[32m19:01:42[0m | [1m##################################################[0m
[32m19:01:42[0m | [1m> Finish![0m


<automl_alex.automl_alex.AutoMLRegressor at 0x18e663635c8>

In [12]:
y_val_pred = model.predict(X_val)
score = r2_score(y_val, y_val_pred)
print(f'R^2 for validation = {round(score, 3)}')

R^2 for validation = 0.587
