In [15]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import lightgbm as lgbm
import xgboost
import multiprocessing
import optuna

from sklearn.neighbors import KNeighborsRegressor

from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [11]:
num_cores = multiprocessing.cpu_count()
num_cores

4

In [3]:
X_train = pd.read_csv("./Dataset/CompressedData.csv")
X_train.head()

Unnamed: 0,Count,Open,High,Low,Close,Volume,VWAP,new_date,Asset_ID,Target
0,-0.420178,-0.231602,-0.231818,-0.231463,-0.2316,-0.163735,-0.231576,2018-01-01,0,0.000148
1,-0.406696,-0.231558,-0.231774,-0.23142,-0.231556,-0.163475,-0.231532,2018-01-02,0,0.000393
2,-0.41285,-0.231527,-0.231744,-0.231389,-0.231525,-0.163609,-0.231501,2018-01-03,0,0.000549
3,-0.411005,-0.231471,-0.231686,-0.231333,-0.231469,-0.163602,-0.231445,2018-01-04,0,6e-06
4,-0.389464,-0.230984,-0.231194,-0.230852,-0.230981,-0.162833,-0.230958,2018-01-05,0,0.005618


In [4]:
%%time

from sklearn.model_selection import cross_validate

def cross_validate_manual(X, y, model):
    cv = cross_validate(
        estimator = model,
        X = X,
        y = y,
        scoring = ["neg_mean_squared_error"],
        cv = 5,
    )
    
    return cv["test_neg_mean_squared_error"].mean()

CPU times: user 12 µs, sys: 2 µs, total: 14 µs
Wall time: 17.6 µs


In [5]:
y_train = X_train['Target']
X_train.drop(["Target", "new_date", "High", "Low", "Close", "Volume"], axis = 1, inplace=True)
X_train.head()

Unnamed: 0,Count,Open,VWAP,Asset_ID
0,-0.420178,-0.231602,-0.231576,0
1,-0.406696,-0.231558,-0.231532,0
2,-0.41285,-0.231527,-0.231501,0
3,-0.411005,-0.231471,-0.231445,0
4,-0.389464,-0.230984,-0.230958,0


In [6]:
model_lgbm = LGBMRegressor()
mean_lgbm = cross_validate_manual(X_train, y_train, model_lgbm)
print("LGBM: ", np.abs(10e6*mean_lgbm))

LGBM:  2.7157851030033764


In [7]:
model_lgbm.get_params()

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 31,
 'objective': None,
 'random_state': None,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': 'warn',
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0}

In [26]:
import optuna.integration.lightgbm as lgb
def objective(trial):
    param_grid = {
        'num_jobs': num_cores - 1,
        "n_estimators": trial.suggest_categorical("n_estimators", [10000]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
        "max_bin": trial.suggest_int("max_bin", 200, 300),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 0.3),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }
    
    model_lgbm = LGBMRegressor()
    mean_lgbm = cross_validate_manual(X_train, y_train, model_lgbm)
#     print("LGBM: ", np.abs(10e6*mean_lgbm))

    return np.abs(10e6*mean_lgbm)

In [27]:
study = optuna.create_study()
study.optimize(objective, n_trials=100)

[32m[I 2022-04-22 13:56:52,328][0m A new study created in memory with name: no-name-1fc41e7c-e1bf-42e0-bfb0-43463204a6d2[0m
[32m[I 2022-04-22 13:56:52,875][0m Trial 0 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.11523742756258078, 'num_leaves': 2380, 'max_depth': 5, 'min_data_in_leaf': 600, 'max_bin': 257, 'lambda_l1': 40, 'lambda_l2': 20, 'min_gain_to_split': 11.931847034546676, 'reg_alpha': 0.23510557858561112, 'reg_lambda': 0.2006361194792157, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:56:53,440][0m Trial 1 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1582791034739922, 'num_leaves': 340, 'max_depth': 10, 'min_data_in_leaf': 3300, 'max_bin': 239, 'lambda_l1': 85, 'lambda_l2': 30, 'min_gain_to_split': 13.071172301689028, 'reg_alpha': 0.12623112574326617, 'reg

[32m[I 2022-04-22 13:56:58,901][0m Trial 11 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.16875762648274256, 'num_leaves': 60, 'max_depth': 7, 'min_data_in_leaf': 200, 'max_bin': 245, 'lambda_l1': 80, 'lambda_l2': 35, 'min_gain_to_split': 14.65553184181674, 'reg_alpha': 0.04881115664099206, 'reg_lambda': 0.4792245772506679, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:56:59,447][0m Trial 12 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.10138624268486103, 'num_leaves': 480, 'max_depth': 5, 'min_data_in_leaf': 3100, 'max_bin': 254, 'lambda_l1': 100, 'lambda_l2': 50, 'min_gain_to_split': 12.897235735100999, 'reg_alpha': 0.200655231600244, 'reg_lambda': 0.957790913220317, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0

[32m[I 2022-04-22 13:57:05,304][0m Trial 22 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.09661034954607822, 'num_leaves': 2680, 'max_depth': 5, 'min_data_in_leaf': 6200, 'max_bin': 277, 'lambda_l1': 5, 'lambda_l2': 0, 'min_gain_to_split': 7.833107531157884, 'reg_alpha': 0.25896062756926386, 'reg_lambda': 0.8090782123404157, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:06,116][0m Trial 23 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.04855284174502488, 'num_leaves': 1720, 'max_depth': 6, 'min_data_in_leaf': 10000, 'max_bin': 286, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 6.12895139987354, 'reg_alpha': 0.2725317185128859, 'reg_lambda': 0.15816096499633872, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial

[32m[I 2022-04-22 13:57:12,500][0m Trial 33 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.22010160365145975, 'num_leaves': 2500, 'max_depth': 3, 'min_data_in_leaf': 4200, 'max_bin': 271, 'lambda_l1': 30, 'lambda_l2': 25, 'min_gain_to_split': 14.109421538419294, 'reg_alpha': 0.2764987456902346, 'reg_lambda': 0.2819967899137693, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:13,125][0m Trial 34 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.15050808064226082, 'num_leaves': 2980, 'max_depth': 3, 'min_data_in_leaf': 2300, 'max_bin': 232, 'lambda_l1': 25, 'lambda_l2': 30, 'min_gain_to_split': 13.874167834373754, 'reg_alpha': 0.19342678128096935, 'reg_lambda': 0.06952892369942021, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 2.71578510

[32m[I 2022-04-22 13:57:19,667][0m Trial 44 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.010772609966282671, 'num_leaves': 2000, 'max_depth': 6, 'min_data_in_leaf': 4400, 'max_bin': 292, 'lambda_l1': 10, 'lambda_l2': 85, 'min_gain_to_split': 0.4596278833532481, 'reg_alpha': 0.22308243878413256, 'reg_lambda': 0.9428229737805262, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:20,239][0m Trial 45 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.03324113398751506, 'num_leaves': 2600, 'max_depth': 8, 'min_data_in_leaf': 800, 'max_bin': 294, 'lambda_l1': 20, 'lambda_l2': 85, 'min_gain_to_split': 0.12459217968499724, 'reg_alpha': 0.20703206387788167, 'reg_lambda': 0.2681607331107288, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with v

[32m[I 2022-04-22 13:57:27,507][0m Trial 55 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.235417588364498, 'num_leaves': 2500, 'max_depth': 3, 'min_data_in_leaf': 2800, 'max_bin': 250, 'lambda_l1': 35, 'lambda_l2': 20, 'min_gain_to_split': 13.690479448789594, 'reg_alpha': 0.18386506205145534, 'reg_lambda': 0.3817361275661758, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:28,221][0m Trial 56 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.22751258009254371, 'num_leaves': 2140, 'max_depth': 3, 'min_data_in_leaf': 2400, 'max_bin': 215, 'lambda_l1': 40, 'lambda_l2': 30, 'min_gain_to_split': 13.42672417570504, 'reg_alpha': 0.1945834549720358, 'reg_lambda': 0.08566625602759739, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 2.71578510300

[32m[I 2022-04-22 13:57:35,152][0m Trial 66 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.06197462773500835, 'num_leaves': 1560, 'max_depth': 10, 'min_data_in_leaf': 8700, 'max_bin': 261, 'lambda_l1': 55, 'lambda_l2': 100, 'min_gain_to_split': 8.651191770233131, 'reg_alpha': 0.25542090202868034, 'reg_lambda': 0.5763416459126465, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:35,854][0m Trial 67 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.10531051440191784, 'num_leaves': 1840, 'max_depth': 12, 'min_data_in_leaf': 6500, 'max_bin': 276, 'lambda_l1': 70, 'lambda_l2': 65, 'min_gain_to_split': 2.173861939399915, 'reg_alpha': 0.24826570398615394, 'reg_lambda': 0.6341408928692531, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with 

[32m[I 2022-04-22 13:57:43,217][0m Trial 77 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.16649403613624367, 'num_leaves': 2380, 'max_depth': 3, 'min_data_in_leaf': 1300, 'max_bin': 252, 'lambda_l1': 35, 'lambda_l2': 35, 'min_gain_to_split': 10.720025974142297, 'reg_alpha': 0.29848666763537196, 'reg_lambda': 0.5130925549894338, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:43,903][0m Trial 78 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.2516500058439589, 'num_leaves': 2500, 'max_depth': 3, 'min_data_in_leaf': 2800, 'max_bin': 242, 'lambda_l1': 35, 'lambda_l2': 25, 'min_gain_to_split': 13.56895732068101, 'reg_alpha': 0.17782394625825337, 'reg_lambda': 0.21046714309772654, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is t

[32m[I 2022-04-22 13:57:50,695][0m Trial 88 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.11526731561644449, 'num_leaves': 980, 'max_depth': 7, 'min_data_in_leaf': 5300, 'max_bin': 224, 'lambda_l1': 80, 'lambda_l2': 45, 'min_gain_to_split': 1.1774694912633323, 'reg_alpha': 0.07408946502729402, 'reg_lambda': 0.7563221214159499, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 13:57:51,368][0m Trial 89 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.14477274121633413, 'num_leaves': 1060, 'max_depth': 9, 'min_data_in_leaf': 9600, 'max_bin': 297, 'lambda_l1': 60, 'lambda_l2': 50, 'min_gain_to_split': 1.666077128732684, 'reg_alpha': 0.09533238741705982, 'reg_lambda': 0.8135969354122522, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 2.7157851030

[32m[I 2022-04-22 13:57:58,149][0m Trial 99 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.06322112730549995, 'num_leaves': 1920, 'max_depth': 10, 'min_data_in_leaf': 8300, 'max_bin': 275, 'lambda_l1': 60, 'lambda_l2': 80, 'min_gain_to_split': 7.286592696553477, 'reg_alpha': 0.27889193871183077, 'reg_lambda': 0.9114397839122704, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 2.7157851030033764.[0m


In [28]:
study.best_params

{'n_estimators': 10000,
 'learning_rate': 0.11523742756258078,
 'num_leaves': 2380,
 'max_depth': 5,
 'min_data_in_leaf': 600,
 'max_bin': 257,
 'lambda_l1': 40,
 'lambda_l2': 20,
 'min_gain_to_split': 11.931847034546676,
 'reg_alpha': 0.23510557858561112,
 'reg_lambda': 0.2006361194792157,
 'bagging_fraction': 0.7,
 'bagging_freq': 1,
 'feature_fraction': 0.6000000000000001}

In [32]:
model_lgbm = LGBMRegressor(n_estimators=10000, learning_rate=0.11524, num_leaves=2380, max_depth=5, min_data_in_leaf= 600, lambda_l1=40, lambda_l2=20, min_gain_to_split=11.932, reg_alpha=0.2351, reg_lambda=0.2, baging_fraction= 0.7, bagging_freq=1, feature_fraction=0.6)
mean_lgbm = cross_validate_manual(X_train, y_train, model_lgbm)
print("LGBM: ", np.abs(10e6*mean_lgbm))

LGBM:  2.2315629253347473


In [29]:
def objective_xgb(trial):
    param_grid = {
        'n_jobs': num_cores - 1,
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 1, 500),
        'max_leaves': trial.suggest_int('max_leaves', 1, 5),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 0.3),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1),
    }

    model_xgb = XGBRegressor()
    mean_xgb = cross_validate_manual(X_train, y_train, model_xgb)
    
    return np.abs(10e6*mean_xgb)

In [30]:
study_xgb = optuna.create_study()
study_xgb.optimize(objective, n_trials=100)

[32m[I 2022-04-22 14:00:20,247][0m A new study created in memory with name: no-name-2941788f-5ec0-4b2c-abe6-3aa40438cfd6[0m
[32m[I 2022-04-22 14:00:20,889][0m Trial 0 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.06891312860856474, 'num_leaves': 220, 'max_depth': 9, 'min_data_in_leaf': 3900, 'max_bin': 245, 'lambda_l1': 10, 'lambda_l2': 80, 'min_gain_to_split': 10.991425222013609, 'reg_alpha': 0.09297560453945597, 'reg_lambda': 0.14678277523208347, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:00:21,560][0m Trial 1 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.19141602078388417, 'num_leaves': 1340, 'max_depth': 12, 'min_data_in_leaf': 300, 'max_bin': 202, 'lambda_l1': 95, 'lambda_l2': 75, 'min_gain_to_split': 1.257546996148589, 'reg_alpha': 0.2647815533130549, 'reg_lambda': 0.921

[32m[I 2022-04-22 14:00:28,179][0m Trial 11 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.17376992286646084, 'num_leaves': 3000, 'max_depth': 12, 'min_data_in_leaf': 5400, 'max_bin': 240, 'lambda_l1': 90, 'lambda_l2': 80, 'min_gain_to_split': 1.5391915590323764, 'reg_alpha': 0.2917477972948885, 'reg_lambda': 0.08310902786898194, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:00:28,812][0m Trial 12 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.11198941050401057, 'num_leaves': 660, 'max_depth': 10, 'min_data_in_leaf': 700, 'max_bin': 243, 'lambda_l1': 70, 'lambda_l2': 75, 'min_gain_to_split': 0.9879086049303829, 'reg_alpha': 0.2167585808653071, 'reg_lambda': 0.25918393958448416, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 2.71578510

[32m[I 2022-04-22 14:00:35,222][0m Trial 22 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.07736949654122423, 'num_leaves': 1000, 'max_depth': 10, 'min_data_in_leaf': 6200, 'max_bin': 290, 'lambda_l1': 10, 'lambda_l2': 30, 'min_gain_to_split': 7.377686398663708, 'reg_alpha': 0.09526949805881293, 'reg_lambda': 0.3824225490869233, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:00:35,909][0m Trial 23 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.07647324645022584, 'num_leaves': 80, 'max_depth': 4, 'min_data_in_leaf': 6300, 'max_bin': 245, 'lambda_l1': 55, 'lambda_l2': 50, 'min_gain_to_split': 7.509595941745482, 'reg_alpha': 0.17217574159190877, 'reg_lambda': 0.011882363537954743, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is

[32m[I 2022-04-22 14:00:42,591][0m Trial 33 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1263676127792152, 'num_leaves': 280, 'max_depth': 6, 'min_data_in_leaf': 9500, 'max_bin': 257, 'lambda_l1': 25, 'lambda_l2': 75, 'min_gain_to_split': 9.60577987518885, 'reg_alpha': 0.08185641497106774, 'reg_lambda': 0.5461285843214451, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:00:43,395][0m Trial 34 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.1390008844365128, 'num_leaves': 240, 'max_depth': 4, 'min_data_in_leaf': 8500, 'max_bin': 275, 'lambda_l1': 30, 'lambda_l2': 50, 'min_gain_to_split': 8.38087965162335, 'reg_alpha': 0.08591362076242318, 'reg_lambda': 0.23378662223308466, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 2

[32m[I 2022-04-22 14:00:49,413][0m Trial 44 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.16636283024158777, 'num_leaves': 180, 'max_depth': 5, 'min_data_in_leaf': 3200, 'max_bin': 267, 'lambda_l1': 45, 'lambda_l2': 70, 'min_gain_to_split': 12.140566382647128, 'reg_alpha': 0.028382628408170553, 'reg_lambda': 0.16747906603591625, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:00:50,006][0m Trial 45 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.21167864629989872, 'num_leaves': 680, 'max_depth': 5, 'min_data_in_leaf': 2900, 'max_bin': 265, 'lambda_l1': 55, 'lambda_l2': 65, 'min_gain_to_split': 10.65146796682708, 'reg_alpha': 0.045014923642182514, 'reg_lambda': 0.14811383428474476, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best 

[32m[I 2022-04-22 14:00:56,389][0m Trial 55 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.17784238423396295, 'num_leaves': 240, 'max_depth': 6, 'min_data_in_leaf': 9500, 'max_bin': 242, 'lambda_l1': 25, 'lambda_l2': 75, 'min_gain_to_split': 10.904619883611065, 'reg_alpha': 0.08500810020697631, 'reg_lambda': 0.5935490898665359, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:00:57,046][0m Trial 56 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.16177082933687587, 'num_leaves': 200, 'max_depth': 3, 'min_data_in_leaf': 8900, 'max_bin': 274, 'lambda_l1': 30, 'lambda_l2': 80, 'min_gain_to_split': 9.787751769273655, 'reg_alpha': 0.07970188422107578, 'reg_lambda': 0.6641828317441361, 'bagging_fraction': 0.30000000000000004, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with valu

[32m[I 2022-04-22 14:01:03,696][0m Trial 66 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.03380699559766717, 'num_leaves': 1920, 'max_depth': 10, 'min_data_in_leaf': 7300, 'max_bin': 293, 'lambda_l1': 10, 'lambda_l2': 15, 'min_gain_to_split': 5.59309522038764, 'reg_alpha': 0.11614463179916086, 'reg_lambda': 0.1231590638713461, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:01:04,341][0m Trial 67 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.270594062756319, 'num_leaves': 660, 'max_depth': 9, 'min_data_in_leaf': 6700, 'max_bin': 247, 'lambda_l1': 65, 'lambda_l2': 10, 'min_gain_to_split': 1.9762767180428522, 'reg_alpha': 0.14376280826896531, 'reg_lambda': 0.4118957415773386, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is tria

[32m[I 2022-04-22 14:01:11,010][0m Trial 77 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.12823815431330488, 'num_leaves': 2560, 'max_depth': 7, 'min_data_in_leaf': 9700, 'max_bin': 254, 'lambda_l1': 85, 'lambda_l2': 80, 'min_gain_to_split': 8.306135010699354, 'reg_alpha': 0.16248639986326197, 'reg_lambda': 0.5107560384727599, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:01:11,712][0m Trial 78 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.17219000569891751, 'num_leaves': 2760, 'max_depth': 6, 'min_data_in_leaf': 9900, 'max_bin': 242, 'lambda_l1': 80, 'lambda_l2': 75, 'min_gain_to_split': 11.088692965167974, 'reg_alpha': 0.07262209828508945, 'reg_lambda': 0.5786009260961155, 'bagging_fraction': 0.2, 'bagging_freq': 1, 'feature_fraction': 0.2}. Best is trial 0 with value: 2.715785103

[32m[I 2022-04-22 14:01:18,337][0m Trial 88 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.01278608863953925, 'num_leaves': 1620, 'max_depth': 11, 'min_data_in_leaf': 4500, 'max_bin': 296, 'lambda_l1': 5, 'lambda_l2': 0, 'min_gain_to_split': 14.062780415737874, 'reg_alpha': 0.22151600835285112, 'reg_lambda': 0.169164257130392, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.7}. Best is trial 0 with value: 2.7157851030033764.[0m
[32m[I 2022-04-22 14:01:18,981][0m Trial 89 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.0582103983269385, 'num_leaves': 1280, 'max_depth': 10, 'min_data_in_leaf': 4400, 'max_bin': 229, 'lambda_l1': 0, 'lambda_l2': 95, 'min_gain_to_split': 13.51162229864891, 'reg_alpha': 0.2281544042288761, 'reg_lambda': 0.9785886812589706, 'bagging_fraction': 0.4, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 

[32m[I 2022-04-22 14:01:25,638][0m Trial 99 finished with value: 2.7157851030033764 and parameters: {'n_estimators': 10000, 'learning_rate': 0.22269556023120501, 'num_leaves': 840, 'max_depth': 6, 'min_data_in_leaf': 7300, 'max_bin': 223, 'lambda_l1': 5, 'lambda_l2': 20, 'min_gain_to_split': 1.810213239571889, 'reg_alpha': 0.10633947614922007, 'reg_lambda': 0.3434551534423648, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.5}. Best is trial 0 with value: 2.7157851030033764.[0m


In [33]:
study_xgb.best_params

{'n_estimators': 10000,
 'learning_rate': 0.06891312860856474,
 'num_leaves': 220,
 'max_depth': 9,
 'min_data_in_leaf': 3900,
 'max_bin': 245,
 'lambda_l1': 10,
 'lambda_l2': 80,
 'min_gain_to_split': 10.991425222013609,
 'reg_alpha': 0.09297560453945597,
 'reg_lambda': 0.14678277523208347,
 'bagging_fraction': 0.5,
 'bagging_freq': 1,
 'feature_fraction': 0.4}

In [36]:
model_xgb = XGBRegressor(n_estimators=10000, learning_rate=0.06891, max_depth=9, max_bin=245, reg_alpha=0.09297, reg_lambda = 0.14678)
mean_xgb = cross_validate_manual(X_train, y_train, model_xgb)
np.abs(10e6*mean_xgb)

2.234547725894764