In [11]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import lightgbm as lgbm
import xgboost
import multiprocessing
import optuna

from sklearn.neighbors import KNeighborsRegressor

from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [12]:
num_cores = multiprocessing.cpu_count()
num_cores

4

In [21]:
X_train = pd.read_csv("./Dataset/train_final.csv")
X_train.drop("Unnamed: 0", axis = 1, inplace=True)
X_train.head()

Unnamed: 0,Count,Open,Volume,Asset_ID,Target,Range_Close_Open,Range_High_Low
0,-0.420178,-0.231602,-0.163735,0,0.000148,0.009311,-0.106561
1,-0.406696,-0.231558,-0.163475,0,0.000393,0.009823,-0.106239
2,-0.41285,-0.231527,-0.163609,0,0.000549,0.009775,-0.106443
3,-0.411005,-0.231471,-0.163602,0,6e-06,0.010801,-0.105861
4,-0.389464,-0.230984,-0.162833,0,0.005618,0.011829,-0.102587


In [None]:
%%time

from sklearn.model_selection import cross_validate

def cross_validate_manual(X, y, model):
    cv = cross_validate(
        estimator = model,
        X = X,
        y = y,
        scoring = ["neg_mean_squared_error"],
        cv = 5,
    )
    
    return cv["test_neg_mean_squared_error"].mean()

In [22]:
model_lgbm = LGBMRegressor()
mean_lgbm = cross_validate_manual(X_train, y_train, model_lgbm)
print("LGBM: ", np.abs(10e6*mean_lgbm))

LGBM:  0.12017209237859969


In [23]:
model_lgbm.get_params()

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 31,
 'objective': None,
 'random_state': None,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': 'warn',
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0}

In [24]:
import optuna.integration.lightgbm as lgb
def objective(trial):
    param_grid = {
        'num_jobs': num_cores - 1,
        "n_estimators": trial.suggest_categorical("n_estimators", [10000]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
        "max_bin": trial.suggest_int("max_bin", 200, 300),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 0.3),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }
    
    model_lgbm = LGBMRegressor()
    mean_lgbm = cross_validate_manual(X_train, y_train, model_lgbm)
#     print("LGBM: ", np.abs(10e6*mean_lgbm))

    return np.abs(10e6*mean_lgbm)

In [25]:
study = optuna.create_study()
study.optimize(objective, n_trials=100)

[32m[I 2022-04-24 17:51:21,399][0m A new study created in memory with name: no-name-39582fc4-0af4-439a-9a7a-e487abfc2f03[0m
[32m[I 2022-04-24 17:51:22,248][0m Trial 0 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.10545152476290759, 'num_leaves': 1600, 'max_depth': 6, 'min_data_in_leaf': 7000, 'max_bin': 247, 'lambda_l1': 95, 'lambda_l2': 50, 'min_gain_to_split': 7.052216867935241, 'reg_alpha': 0.13302597513428918, 'reg_lambda': 0.5360701301180708, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:51:23,202][0m Trial 1 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2903160499650307, 'num_leaves': 1080, 'max_depth': 5, 'min_data_in_leaf': 3800, 'max_bin': 268, 'lambda_l1': 70, 'lambda_l2': 45, 'min_gain_to_split': 14.755431968247809, 'reg_alpha': 0.14313911405448332, '

[32m[I 2022-04-24 17:51:32,777][0m Trial 11 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.15071668058293236, 'num_leaves': 1440, 'max_depth': 5, 'min_data_in_leaf': 9500, 'max_bin': 245, 'lambda_l1': 60, 'lambda_l2': 55, 'min_gain_to_split': 9.429349907765777, 'reg_alpha': 0.15368293289359183, 'reg_lambda': 0.13735815839953947, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:51:33,880][0m Trial 12 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.08178702806767418, 'num_leaves': 1720, 'max_depth': 5, 'min_data_in_leaf': 4600, 'max_bin': 233, 'lambda_l1': 40, 'lambda_l2': 20, 'min_gain_to_split': 5.356447507366459, 'reg_alpha': 0.15861629739547303, 'reg_lambda': 0.24157833565153286, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 0.12017

[32m[I 2022-04-24 17:51:43,714][0m Trial 22 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.06314372465973295, 'num_leaves': 1440, 'max_depth': 12, 'min_data_in_leaf': 7000, 'max_bin': 222, 'lambda_l1': 45, 'lambda_l2': 30, 'min_gain_to_split': 1.5934067988358445, 'reg_alpha': 0.18746142140915525, 'reg_lambda': 0.4077232188352091, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:51:44,559][0m Trial 23 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.11704275533917474, 'num_leaves': 2700, 'max_depth': 11, 'min_data_in_leaf': 5100, 'max_bin': 247, 'lambda_l1': 0, 'lambda_l2': 95, 'min_gain_to_split': 5.38129005470765, 'reg_alpha': 0.26449216517561985, 'reg_lambda': 0.5871842495551736, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with

[32m[I 2022-04-24 17:51:54,310][0m Trial 33 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.010036661366265912, 'num_leaves': 2280, 'max_depth': 9, 'min_data_in_leaf': 7900, 'max_bin': 241, 'lambda_l1': 0, 'lambda_l2': 50, 'min_gain_to_split': 6.673232131198498, 'reg_alpha': 0.11864114847851306, 'reg_lambda': 0.3213007565619734, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:51:55,327][0m Trial 34 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.035168077200632676, 'num_leaves': 20, 'max_depth': 9, 'min_data_in_leaf': 9100, 'max_bin': 228, 'lambda_l1': 10, 'lambda_l2': 50, 'min_gain_to_split': 3.98269369792534, 'reg_alpha': 0.22902922368030257, 'reg_lambda': 0.4976396866599445, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with val

[32m[I 2022-04-24 17:52:06,051][0m Trial 44 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.10630204557668119, 'num_leaves': 1960, 'max_depth': 6, 'min_data_in_leaf': 8600, 'max_bin': 200, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 4.37423942750213, 'reg_alpha': 0.041695729186274996, 'reg_lambda': 0.7456253145948615, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:52:07,058][0m Trial 45 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.026787854217166576, 'num_leaves': 2520, 'max_depth': 5, 'min_data_in_leaf': 7400, 'max_bin': 246, 'lambda_l1': 15, 'lambda_l2': 80, 'min_gain_to_split': 9.589704651991491, 'reg_alpha': 0.06153195316291475, 'reg_lambda': 0.7687032162721058, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is 

[32m[I 2022-04-24 17:52:17,385][0m Trial 55 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2178862827891455, 'num_leaves': 2060, 'max_depth': 10, 'min_data_in_leaf': 5900, 'max_bin': 233, 'lambda_l1': 0, 'lambda_l2': 45, 'min_gain_to_split': 6.824825738416589, 'reg_alpha': 0.16494184331326708, 'reg_lambda': 0.3127493782628253, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:52:18,404][0m Trial 56 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.18924612850811345, 'num_leaves': 160, 'max_depth': 9, 'min_data_in_leaf': 9200, 'max_bin': 229, 'lambda_l1': 15, 'lambda_l2': 50, 'min_gain_to_split': 3.4569078460085616, 'reg_alpha': 0.22134331762300724, 'reg_lambda': 0.4831812359758556, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with va

[32m[I 2022-04-24 17:52:28,613][0m Trial 66 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.09513534571158207, 'num_leaves': 580, 'max_depth': 10, 'min_data_in_leaf': 2800, 'max_bin': 259, 'lambda_l1': 95, 'lambda_l2': 25, 'min_gain_to_split': 12.874541144655762, 'reg_alpha': 0.14774375744858434, 'reg_lambda': 0.6560384973434845, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:52:29,632][0m Trial 67 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.07587955185502979, 'num_leaves': 1460, 'max_depth': 11, 'min_data_in_leaf': 1000, 'max_bin': 267, 'lambda_l1': 45, 'lambda_l2': 30, 'min_gain_to_split': 14.152589380666072, 'reg_alpha': 0.13090557695225025, 'reg_lambda': 0.42262041521950433, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 w

[32m[I 2022-04-24 17:52:40,009][0m Trial 77 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.19735341925757252, 'num_leaves': 2300, 'max_depth': 8, 'min_data_in_leaf': 7200, 'max_bin': 204, 'lambda_l1': 100, 'lambda_l2': 45, 'min_gain_to_split': 7.181335829537346, 'reg_alpha': 0.17906725027420395, 'reg_lambda': 0.1893117668923291, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:52:41,224][0m Trial 78 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.22508265166282979, 'num_leaves': 2120, 'max_depth': 9, 'min_data_in_leaf': 7600, 'max_bin': 233, 'lambda_l1': 0, 'lambda_l2': 45, 'min_gain_to_split': 5.729578229455255, 'reg_alpha': 0.19410934366710345, 'reg_lambda': 0.11411316134564253, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000

[32m[I 2022-04-24 17:52:51,531][0m Trial 88 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.13317880389378273, 'num_leaves': 1360, 'max_depth': 3, 'min_data_in_leaf': 2400, 'max_bin': 274, 'lambda_l1': 60, 'lambda_l2': 60, 'min_gain_to_split': 12.270760823915248, 'reg_alpha': 0.0851409703098693, 'reg_lambda': 0.6256201802989783, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:52:52,570][0m Trial 89 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.14264403918821975, 'num_leaves': 1260, 'max_depth': 3, 'min_data_in_leaf': 1500, 'max_bin': 284, 'lambda_l1': 55, 'lambda_l2': 60, 'min_gain_to_split': 14.988786921070119, 'reg_alpha': 0.05659800478561991, 'reg_lambda': 0.524066999173023, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 0.1201720

[32m[I 2022-04-24 17:53:03,605][0m Trial 99 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.28789999973303637, 'num_leaves': 640, 'max_depth': 10, 'min_data_in_leaf': 4300, 'max_bin': 253, 'lambda_l1': 40, 'lambda_l2': 15, 'min_gain_to_split': 0.16557104492410946, 'reg_alpha': 0.1014250811874546, 'reg_lambda': 0.7020725482278862, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 0.12017209237859969.[0m


In [26]:
study.best_params

{'n_estimators': 10000,
 'learning_rate': 0.10545152476290759,
 'num_leaves': 1600,
 'max_depth': 6,
 'min_data_in_leaf': 7000,
 'max_bin': 247,
 'lambda_l1': 95,
 'lambda_l2': 50,
 'min_gain_to_split': 7.052216867935241,
 'reg_alpha': 0.13302597513428918,
 'reg_lambda': 0.5360701301180708,
 'bagging_fraction': 0.6000000000000001,
 'bagging_freq': 1,
 'feature_fraction': 0.2}

In [27]:
model_lgbm = LGBMRegressor(n_estimators=10000, learning_rate=0.11524, num_leaves=2380, max_depth=5, min_data_in_leaf= 600, lambda_l1=40, lambda_l2=20, min_gain_to_split=11.932, reg_alpha=0.2351, reg_lambda=0.2, baging_fraction= 0.7, bagging_freq=1, feature_fraction=0.6)
mean_lgbm = cross_validate_manual(X_train, y_train, model_lgbm)
print("LGBM: ", np.abs(10e6*mean_lgbm))

LGBM:  2.2315629253347473


In [28]:
def objective_xgb(trial):
    param_grid = {
        'n_jobs': num_cores - 1,
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 1, 500),
        'max_leaves': trial.suggest_int('max_leaves', 1, 5),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 0.3),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1),
    }

    model_xgb = XGBRegressor()
    mean_xgb = cross_validate_manual(X_train, y_train, model_xgb)
    
    return np.abs(10e6*mean_xgb)

In [29]:
study_xgb = optuna.create_study()
study_xgb.optimize(objective, n_trials=100)

[32m[I 2022-04-24 17:53:48,577][0m A new study created in memory with name: no-name-2356631c-bb2b-4438-999a-0f1fea4938f4[0m
[32m[I 2022-04-24 17:53:49,412][0m Trial 0 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1361709887234184, 'num_leaves': 2960, 'max_depth': 8, 'min_data_in_leaf': 5400, 'max_bin': 222, 'lambda_l1': 30, 'lambda_l2': 80, 'min_gain_to_split': 8.034321470919213, 'reg_alpha': 0.28743895460283664, 'reg_lambda': 0.15386204858977315, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:53:50,450][0m Trial 1 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.026577082654462632, 'num_leaves': 100, 'max_depth': 3, 'min_data_in_leaf': 2000, 'max_bin': 218, 'lambda_l1': 45, 'lambda_l2': 100, 'min_gain_to_split': 8.298618205224198, 'reg_alpha': 0.018175470514705538, 'reg_lambda': 

[32m[I 2022-04-24 17:53:58,738][0m Trial 11 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.010573301755708031, 'num_leaves': 2340, 'max_depth': 6, 'min_data_in_leaf': 2700, 'max_bin': 214, 'lambda_l1': 65, 'lambda_l2': 70, 'min_gain_to_split': 6.4254569637088945, 'reg_alpha': 0.1365408635241615, 'reg_lambda': 0.33192883494045883, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:53:59,753][0m Trial 12 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.08200437160944574, 'num_leaves': 2180, 'max_depth': 6, 'min_data_in_leaf': 2300, 'max_bin': 223, 'lambda_l1': 0, 'lambda_l2': 100, 'min_gain_to_split': 7.473125073973886, 'reg_alpha': 0.12455873337511869, 'reg_lambda': 0.9920816154761494, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 wit

[32m[I 2022-04-24 17:54:08,895][0m Trial 22 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.23121552863058253, 'num_leaves': 2700, 'max_depth': 9, 'min_data_in_leaf': 3800, 'max_bin': 232, 'lambda_l1': 90, 'lambda_l2': 65, 'min_gain_to_split': 12.194127930761793, 'reg_alpha': 0.29763824386014603, 'reg_lambda': 0.3693163325597154, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:54:09,765][0m Trial 23 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1186393226120017, 'num_leaves': 1360, 'max_depth': 10, 'min_data_in_leaf': 3300, 'max_bin': 254, 'lambda_l1': 100, 'lambda_l2': 35, 'min_gain_to_split': 3.426920872235651, 'reg_alpha': 0.25429839882822847, 'reg_lambda': 0.12524569358483417, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Bes

[32m[I 2022-04-24 17:54:20,906][0m Trial 33 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2923015693251561, 'num_leaves': 980, 'max_depth': 12, 'min_data_in_leaf': 4900, 'max_bin': 210, 'lambda_l1': 0, 'lambda_l2': 5, 'min_gain_to_split': 0.10125523548691728, 'reg_alpha': 0.2395223176875355, 'reg_lambda': 0.0880886434090961, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:54:22,266][0m Trial 34 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2571908264923516, 'num_leaves': 1080, 'max_depth': 11, 'min_data_in_leaf': 4300, 'max_bin': 271, 'lambda_l1': 10, 'lambda_l2': 20, 'min_gain_to_split': 1.673576418792517, 'reg_alpha': 0.2766660772188379, 'reg_lambda': 0.08325360536493112, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.120172092

[32m[I 2022-04-24 17:54:33,897][0m Trial 44 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1768727108101985, 'num_leaves': 2980, 'max_depth': 8, 'min_data_in_leaf': 6000, 'max_bin': 284, 'lambda_l1': 80, 'lambda_l2': 30, 'min_gain_to_split': 2.43388842440266, 'reg_alpha': 0.20452833792235803, 'reg_lambda': 0.2998139508352617, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:54:34,919][0m Trial 45 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.22132103502844092, 'num_leaves': 2500, 'max_depth': 12, 'min_data_in_leaf': 4700, 'max_bin': 266, 'lambda_l1': 75, 'lambda_l2': 0, 'min_gain_to_split': 0.7540338450434836, 'reg_alpha': 0.28104491398315445, 'reg_lambda': 0.2520978852529553, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is 

[32m[I 2022-04-24 17:54:48,534][0m Trial 55 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2920679619130091, 'num_leaves': 1060, 'max_depth': 12, 'min_data_in_leaf': 2700, 'max_bin': 220, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 0.5554918655194543, 'reg_alpha': 0.2430495575369908, 'reg_lambda': 0.0755242307729194, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:54:49,600][0m Trial 56 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2622896940994152, 'num_leaves': 980, 'max_depth': 11, 'min_data_in_leaf': 4400, 'max_bin': 273, 'lambda_l1': 5, 'lambda_l2': 10, 'min_gain_to_split': 1.7200610936694256, 'reg_alpha': 0.17906782124512122, 'reg_lambda': 0.061614637057281665, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.12017209

[32m[I 2022-04-24 17:55:00,068][0m Trial 66 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.15684458575083943, 'num_leaves': 2880, 'max_depth': 6, 'min_data_in_leaf': 3900, 'max_bin': 244, 'lambda_l1': 100, 'lambda_l2': 60, 'min_gain_to_split': 14.042744168431406, 'reg_alpha': 0.08042993963830848, 'reg_lambda': 0.44741566341849814, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:55:01,104][0m Trial 67 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1880667545268977, 'num_leaves': 2760, 'max_depth': 8, 'min_data_in_leaf': 8500, 'max_bin': 226, 'lambda_l1': 85, 'lambda_l2': 50, 'min_gain_to_split': 10.371293324544268, 'reg_alpha': 0.11635529751156348, 'reg_lambda': 0.5400206085055413, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best

[32m[I 2022-04-24 17:55:13,281][0m Trial 77 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2701529290278014, 'num_leaves': 720, 'max_depth': 3, 'min_data_in_leaf': 900, 'max_bin': 213, 'lambda_l1': 10, 'lambda_l2': 10, 'min_gain_to_split': 1.1279644148912469, 'reg_alpha': 0.25128528425293317, 'reg_lambda': 0.12072316781098495, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:55:14,365][0m Trial 78 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.29139008772096, 'num_leaves': 1220, 'max_depth': 12, 'min_data_in_leaf': 2600, 'max_bin': 216, 'lambda_l1': 5, 'lambda_l2': 5, 'min_gain_to_split': 1.5491292700409698, 'reg_alpha': 0.24264341962749927, 'reg_lambda': 0.8960456391618874, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 0.12017209237

[32m[I 2022-04-24 17:55:25,755][0m Trial 88 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.040467225503266216, 'num_leaves': 600, 'max_depth': 6, 'min_data_in_leaf': 8000, 'max_bin': 240, 'lambda_l1': 65, 'lambda_l2': 90, 'min_gain_to_split': 11.658825008114786, 'reg_alpha': 0.05270007388682546, 'reg_lambda': 0.5831427433424453, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 0.12017209237859969.[0m
[32m[I 2022-04-24 17:55:26,836][0m Trial 89 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.20852254712086077, 'num_leaves': 540, 'max_depth': 6, 'min_data_in_leaf': 9400, 'max_bin': 240, 'lambda_l1': 60, 'lambda_l2': 85, 'min_gain_to_split': 8.840612688738318, 'reg_alpha': 0.034573770153827, 'reg_lambda': 0.41915817149917123, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with v

[32m[I 2022-04-24 17:55:37,738][0m Trial 99 finished with value: 0.12017209237859969 and parameters: {'n_estimators': 10000, 'learning_rate': 0.18135938578864313, 'num_leaves': 2580, 'max_depth': 10, 'min_data_in_leaf': 6900, 'max_bin': 226, 'lambda_l1': 95, 'lambda_l2': 80, 'min_gain_to_split': 11.205060089233278, 'reg_alpha': 0.284362771460267, 'reg_lambda': 0.3060260823068432, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 0.12017209237859969.[0m


In [30]:
study_xgb.best_params

{'n_estimators': 10000,
 'learning_rate': 0.1361709887234184,
 'num_leaves': 2960,
 'max_depth': 8,
 'min_data_in_leaf': 5400,
 'max_bin': 222,
 'lambda_l1': 30,
 'lambda_l2': 80,
 'min_gain_to_split': 8.034321470919213,
 'reg_alpha': 0.28743895460283664,
 'reg_lambda': 0.15386204858977315,
 'bagging_fraction': 0.5,
 'bagging_freq': 1,
 'feature_fraction': 0.7}

In [32]:
model_xgb = XGBRegressor(n_estimators=10000, learning_rate=0.06891, max_depth=9, max_bin=245, reg_alpha=0.09297, reg_lambda = 0.14678)
mean_xgb = cross_validate_manual(X_train, y_train, model_xgb)
np.abs(10e6*mean_xgb)

0.1343926745283607