In [1]:
import warnings
warnings.filterwarnings(action="ignore")
import datetime
import time
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import HyperbandPruner

In [2]:
RANDOM_SEED = 99

In [3]:
train_df = pd.read_parquet('./data/train_v2_3_nn.parquet')
train_null_df = pd.read_parquet('./data/train_v2_3_n.parquet').drop(columns=["BN"])
test_df = pd.read_parquet('./data/test_v2_3.parquet')

In [4]:
train_x = train_df.drop(columns=["BN"])
train_y = train_df["BN"]

In [5]:
def create_model(param):
    return xgb.XGBRegressor(
        random_state=RANDOM_SEED,
        tree_method="gpu_hist",
        eval_metric="rmse",
        early_stopping_rounds=100,
        **param
    )

In [6]:
def train_model(param, X, y, X_test, trial=None, test=False):
  skf = KFold(n_splits=3, shuffle=True, random_state=RANDOM_SEED)

  val_scores = []
  y_tests = []
  models = []

  for idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):

    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

    model = create_model(param)
    model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=1000)

    y_hat_val = model.predict(X_val)
    score = mean_squared_error(y_val.values, y_hat_val, squared=False)
    val_scores.append(score)
    print(f'Fold: {idx+1}/3 rmse score = {score:.5f}')

    if test:
      y_tests.append(model.predict(X_test))
      models.append(model)

    if trial:
      trial.report(score, idx)

      if trial.should_prune():
        raise optuna.TrialPruned()

  return val_scores, y_tests, models

In [7]:
def objective_xgb(trial):
  param = {
    "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
    "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
    "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    "subsample": trial.suggest_float("subsample", 0.1, 1.0),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
    "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
    "max_depth": trial.suggest_int("max_depth", 3, 9),
    "min_child_weight": trial.suggest_int("min_child_weight", 2, 10),
    "eta": trial.suggest_float("eta", 1e-8, 1.0, log=True),
    "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
    "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
  }

  if param["booster"] == "dart":
    param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
    param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
    param['rate_drop'] = trial.suggest_float('rate_drop', 1e-8, 1.0, log=True)
    param['skip_drop'] = trial.suggest_float('skip_drop', 1e-8, 1.0, log=True)

  val_scores, y_tests, models = train_model(param, train_x, train_y, test_df, trial)

  return sum(val_scores) / len(val_scores)

In [8]:
study = optuna.create_study(
    sampler=TPESampler(seed=RANDOM_SEED),
    direction='minimize',
    study_name='xgb_tuning',
    pruner=HyperbandPruner(
      min_resource=1, max_resource=8, reduction_factor=3
    ),
)

study.optimize(objective_xgb, n_trials=50)

[I 2023-10-10 15:08:45,536] A new study created in memory with name: xgb_tuning


[0]	validation_0-rmse:2.60828	validation_1-rmse:2.60307
[299]	validation_0-rmse:2.60827	validation_1-rmse:2.60307
Fold: 1/3 rmse score = 2.60307
[0]	validation_0-rmse:2.60492	validation_1-rmse:2.60978
[299]	validation_0-rmse:2.60492	validation_1-rmse:2.60978
Fold: 2/3 rmse score = 2.60978
[0]	validation_0-rmse:2.60643	validation_1-rmse:2.60677
[299]	validation_0-rmse:2.60643	validation_1-rmse:2.60677


[I 2023-10-10 15:08:58,338] Trial 0 finished with value: 2.606540033674593 and parameters: {'booster': 'gbtree', 'lambda': 0.04017550942202233, 'alpha': 1.7847241527490656e-08, 'subsample': 0.8272449670283629, 'colsample_bytree': 0.6090556776494775, 'n_estimators': 300, 'max_depth': 3, 'min_child_weight': 10, 'eta': 1.1339812255777714e-08, 'gamma': 0.014399394343769443, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 2.606540033674593.


Fold: 3/3 rmse score = 2.60677
[0]	validation_0-rmse:2.60827	validation_1-rmse:2.60307
[899]	validation_0-rmse:2.60448	validation_1-rmse:2.59929
Fold: 1/3 rmse score = 2.59929
[0]	validation_0-rmse:2.60492	validation_1-rmse:2.60978
[899]	validation_0-rmse:2.60114	validation_1-rmse:2.60599
Fold: 2/3 rmse score = 2.60599
[0]	validation_0-rmse:2.60643	validation_1-rmse:2.60677
[899]	validation_0-rmse:2.60264	validation_1-rmse:2.60298


[I 2023-10-10 15:41:45,624] Trial 1 finished with value: 2.602753930514485 and parameters: {'booster': 'dart', 'lambda': 1.4575798463318516e-05, 'alpha': 0.6189425995687715, 'subsample': 0.5719732438377161, 'colsample_bytree': 0.1842517839679773, 'n_estimators': 900, 'max_depth': 4, 'min_child_weight': 6, 'eta': 2.178476742104051e-06, 'gamma': 0.033817296080697445, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 1.4193450623251104e-07, 'skip_drop': 4.983900949448835e-07}. Best is trial 1 with value: 2.602753930514485.


Fold: 3/3 rmse score = 2.60298
[0]	validation_0-rmse:2.60828	validation_1-rmse:2.60307
[199]	validation_0-rmse:2.60827	validation_1-rmse:2.60306
Fold: 1/3 rmse score = 2.60306
[0]	validation_0-rmse:2.60492	validation_1-rmse:2.60978
[199]	validation_0-rmse:2.60491	validation_1-rmse:2.60977
Fold: 2/3 rmse score = 2.60977
[0]	validation_0-rmse:2.60643	validation_1-rmse:2.60677
[199]	validation_0-rmse:2.60642	validation_1-rmse:2.60676


[I 2023-10-10 15:42:40,104] Trial 2 finished with value: 2.6065317024305865 and parameters: {'booster': 'gbtree', 'lambda': 7.591896035963056e-07, 'alpha': 1.128298527729365e-08, 'subsample': 0.9087797683651598, 'colsample_bytree': 0.597010984189897, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 6, 'eta': 2.1580835301822618e-08, 'gamma': 0.00015813016339763232, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 2.602753930514485.


Fold: 3/3 rmse score = 2.60676
[0]	validation_0-rmse:2.00867	validation_1-rmse:2.00695
[399]	validation_0-rmse:0.94155	validation_1-rmse:0.99940
Fold: 1/3 rmse score = 0.99924
[0]	validation_0-rmse:2.01206	validation_1-rmse:2.01475
[399]	validation_0-rmse:0.94620	validation_1-rmse:0.99360
Fold: 2/3 rmse score = 0.99360
[0]	validation_0-rmse:2.01266	validation_1-rmse:2.01195
[399]	validation_0-rmse:0.93896	validation_1-rmse:0.99221


[I 2023-10-10 15:42:54,937] Trial 3 finished with value: 0.9950165722027737 and parameters: {'booster': 'gbtree', 'lambda': 0.5574271302327638, 'alpha': 5.369144319173959e-05, 'subsample': 0.3376820528693848, 'colsample_bytree': 0.5944405760344648, 'n_estimators': 400, 'max_depth': 4, 'min_child_weight': 10, 'eta': 0.32615150214792754, 'gamma': 0.05011280824271637, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 0.99221
[0]	validation_0-rmse:2.60828	validation_1-rmse:2.60307
[699]	validation_0-rmse:2.60800	validation_1-rmse:2.60280
Fold: 1/3 rmse score = 2.60280
[0]	validation_0-rmse:2.60492	validation_1-rmse:2.60978
[699]	validation_0-rmse:2.60465	validation_1-rmse:2.60951
Fold: 2/3 rmse score = 2.60951
[0]	validation_0-rmse:2.60643	validation_1-rmse:2.60677
[699]	validation_0-rmse:2.60616	validation_1-rmse:2.60650


[I 2023-10-10 15:43:19,092] Trial 4 finished with value: 2.6062711844270505 and parameters: {'booster': 'gbtree', 'lambda': 1.9040058970389597e-05, 'alpha': 0.0013569405041690126, 'subsample': 0.9788061899951741, 'colsample_bytree': 0.7347595233221714, 'n_estimators': 700, 'max_depth': 4, 'min_child_weight': 4, 'eta': 1.9058143481065904e-07, 'gamma': 2.700633373820546e-06, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 2.60650
[0]	validation_0-rmse:2.60780	validation_1-rmse:2.60259
[999]	validation_0-rmse:2.54858	validation_1-rmse:2.54354
Fold: 1/3 rmse score = 2.54354
[0]	validation_0-rmse:2.60444	validation_1-rmse:2.60930
[999]	validation_0-rmse:2.54543	validation_1-rmse:2.55014


[I 2023-10-10 16:09:57,369] Trial 5 pruned. 


Fold: 2/3 rmse score = 2.55014
[0]	validation_0-rmse:2.60532	validation_1-rmse:2.60013
[599]	validation_0-rmse:1.53161	validation_1-rmse:1.53289
Fold: 1/3 rmse score = 1.53289
[0]	validation_0-rmse:2.60198	validation_1-rmse:2.60684
[599]	validation_0-rmse:1.53321	validation_1-rmse:1.53393
Fold: 2/3 rmse score = 1.53393

[I 2023-10-10 16:10:11,374] Trial 6 pruned. 



[0]	validation_0-rmse:2.60827	validation_1-rmse:2.60306
[899]	validation_0-rmse:2.59998	validation_1-rmse:2.59481
Fold: 1/3 rmse score = 2.59481
[0]	validation_0-rmse:2.60491	validation_1-rmse:2.60977
[899]	validation_0-rmse:2.59666	validation_1-rmse:2.60150


[I 2023-10-10 16:30:49,861] Trial 7 pruned. 


Fold: 2/3 rmse score = 2.60150
[0]	validation_0-rmse:2.60699	validation_1-rmse:2.60179
[799]	validation_0-rmse:1.85286	validation_1-rmse:1.85117
Fold: 1/3 rmse score = 1.85117
[0]	validation_0-rmse:2.60366	validation_1-rmse:2.60851
[799]	validation_0-rmse:1.85273	validation_1-rmse:1.85517
Fold: 2/3 rmse score = 1.85517
[0]	validation_0-rmse:2.60515	validation_1-rmse:2.60550
[799]	validation_0-rmse:1.85332	validation_1-rmse:1.85360


[I 2023-10-10 16:55:54,451] Trial 8 finished with value: 1.8533097593640064 and parameters: {'booster': 'dart', 'lambda': 0.016269894265187258, 'alpha': 0.00027473371985831937, 'subsample': 0.7205784430671397, 'colsample_bytree': 0.32422360794447835, 'n_estimators': 800, 'max_depth': 5, 'min_child_weight': 7, 'eta': 0.000640160651175959, 'gamma': 0.0007690362020385368, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 2.469722638494642e-07, 'skip_drop': 0.0028449094047407666}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 1.85360
[0]	validation_0-rmse:2.60828	validation_1-rmse:2.60307
[699]	validation_0-rmse:2.60809	validation_1-rmse:2.60289
Fold: 1/3 rmse score = 2.60289
[0]	validation_0-rmse:2.60492	validation_1-rmse:2.60978
[699]	validation_0-rmse:2.60474	validation_1-rmse:2.60959


[I 2023-10-10 17:09:16,785] Trial 9 pruned. 


Fold: 2/3 rmse score = 2.60959
[0]	validation_0-rmse:1.55449	validation_1-rmse:1.55640
[110]	validation_0-rmse:1.21028	validation_1-rmse:1.38793
Fold: 1/3 rmse score = 1.07107
[0]	validation_0-rmse:1.55871	validation_1-rmse:1.56236
[107]	validation_0-rmse:1.24566	validation_1-rmse:1.44625
Fold: 2/3 rmse score = 1.05824
[0]	validation_0-rmse:1.55051	validation_1-rmse:1.55331
[110]	validation_0-rmse:1.19915	validation_1-rmse:1.37317


[I 2023-10-10 17:09:29,861] Trial 10 finished with value: 1.0609680492402778 and parameters: {'booster': 'gbtree', 'lambda': 0.20136472246731266, 'alpha': 0.0037294959598213996, 'subsample': 0.1136943669843487, 'colsample_bytree': 0.9836482175958339, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.5421827357095554, 'gamma': 0.6491750314063931, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 1.05359
[0]	validation_0-rmse:1.15800	validation_1-rmse:1.17022
[102]	validation_0-rmse:182.49337	validation_1-rmse:199.77198
Fold: 1/3 rmse score = 1.13355
[0]	validation_0-rmse:1.17105	validation_1-rmse:1.17173
[103]	validation_0-rmse:1315.69334	validation_1-rmse:1441.34958
Fold: 2/3 rmse score = 1.13185
[0]	validation_0-rmse:1.15771	validation_1-rmse:1.16419
[102]	validation_0-rmse:481.15389	validation_1-rmse:523.07952


[I 2023-10-10 17:09:41,248] Trial 11 finished with value: 1.1272107024133406 and parameters: {'booster': 'gbtree', 'lambda': 0.6915180707346773, 'alpha': 0.007676817778163076, 'subsample': 0.10260407700657599, 'colsample_bytree': 0.9303022934183071, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.989503289598702, 'gamma': 0.7293916524775244, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 1.11623
[0]	validation_0-rmse:1.34537	validation_1-rmse:1.35134
[128]	validation_0-rmse:0.86189	validation_1-rmse:1.08609
Fold: 1/3 rmse score = 1.00927
[0]	validation_0-rmse:1.34736	validation_1-rmse:1.34826
[116]	validation_0-rmse:0.86671	validation_1-rmse:1.07515
Fold: 2/3 rmse score = 1.00774
[0]	validation_0-rmse:1.34464	validation_1-rmse:1.34750
[123]	validation_0-rmse:0.86448	validation_1-rmse:1.08192


[I 2023-10-10 17:09:54,293] Trial 12 finished with value: 1.0074602339266332 and parameters: {'booster': 'gbtree', 'lambda': 0.9390089759158639, 'alpha': 0.0085597513515605, 'subsample': 0.4052937943893192, 'colsample_bytree': 0.965774787473821, 'n_estimators': 400, 'max_depth': 7, 'min_child_weight': 2, 'eta': 0.6887165204615721, 'gamma': 0.998337799421535, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 1.00537
[0]	validation_0-rmse:2.48430	validation_1-rmse:2.47957
[99]	validation_0-rmse:0.99449	validation_1-rmse:1.01846
Fold: 1/3 rmse score = 1.01846
[0]	validation_0-rmse:2.48085	validation_1-rmse:2.48537
[99]	validation_0-rmse:1.00127	validation_1-rmse:1.01129
Fold: 2/3 rmse score = 1.01129
[0]	validation_0-rmse:2.48261	validation_1-rmse:2.48326
[99]	validation_0-rmse:0.99730	validation_1-rmse:1.01361


[I 2023-10-10 17:10:05,920] Trial 13 finished with value: 1.0144539248021587 and parameters: {'booster': 'gbtree', 'lambda': 0.006004222319263677, 'alpha': 5.162400489170195e-05, 'subsample': 0.4220775521125347, 'colsample_bytree': 0.7575724832065093, 'n_estimators': 100, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.05933074855108428, 'gamma': 0.6683362526305419, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 1.01361
[0]	validation_0-rmse:2.56798	validation_1-rmse:2.56296
[499]	validation_0-rmse:1.00603	validation_1-rmse:1.02680
Fold: 1/3 rmse score = 1.02680
[0]	validation_0-rmse:2.56483	validation_1-rmse:2.56958
[499]	validation_0-rmse:1.01173	validation_1-rmse:1.01879


[I 2023-10-10 17:10:24,034] Trial 14 pruned. 


Fold: 2/3 rmse score = 1.01879
[0]	validation_0-rmse:2.56749	validation_1-rmse:2.56249
[299]	validation_0-rmse:0.99097	validation_1-rmse:1.01530
Fold: 1/3 rmse score = 1.01530
[0]	validation_0-rmse:2.56413	validation_1-rmse:2.56887
[299]	validation_0-rmse:0.99767	validation_1-rmse:1.00807
Fold: 2/3 rmse score = 1.00807
[0]	validation_0-rmse:2.56580	validation_1-rmse:2.56613
[299]	validation_0-rmse:0.99281	validation_1-rmse:1.00936


[I 2023-10-10 17:10:50,803] Trial 15 finished with value: 1.0109124737251527 and parameters: {'booster': 'gbtree', 'lambda': 0.0016302588828141604, 'alpha': 0.00022220824010587233, 'subsample': 0.5039745036025409, 'colsample_bytree': 0.703970317433247, 'n_estimators': 300, 'max_depth': 7, 'min_child_weight': 3, 'eta': 0.01974121531316481, 'gamma': 0.0018815303958341514, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.9950165722027737.


Fold: 3/3 rmse score = 1.00936
[0]	validation_0-rmse:2.30562	validation_1-rmse:2.30171
[499]	validation_0-rmse:0.79194	validation_1-rmse:0.95388
Fold: 1/3 rmse score = 0.95376
[0]	validation_0-rmse:2.30391	validation_1-rmse:2.30792
[499]	validation_0-rmse:0.79690	validation_1-rmse:0.94890
Fold: 2/3 rmse score = 0.94797
[0]	validation_0-rmse:2.30462	validation_1-rmse:2.30568
[499]	validation_0-rmse:0.79178	validation_1-rmse:0.95194
Fold: 3/3 rmse score = 0.95160

[I 2023-10-10 17:11:27,456] Trial 16 finished with value: 0.9511110566496127 and parameters: {'booster': 'gbtree', 'lambda': 0.0011101304704990158, 'alpha': 0.018777324539088213, 'subsample': 0.36059458575414616, 'colsample_bytree': 0.8542635526574458, 'n_estimators': 500, 'max_depth': 7, 'min_child_weight': 8, 'eta': 0.14566841873916728, 'gamma': 0.10061518059491076, 'grow_policy': 'lossguide'}. Best is trial 16 with value: 0.9511110566496127.



[0]	validation_0-rmse:2.60149	validation_1-rmse:2.59631
[599]	validation_0-rmse:1.16493	validation_1-rmse:1.17109
Fold: 1/3 rmse score = 1.17109
[0]	validation_0-rmse:2.59816	validation_1-rmse:2.60300
[599]	validation_0-rmse:1.16902	validation_1-rmse:1.16661


[I 2023-10-10 17:11:44,988] Trial 17 pruned. 


Fold: 2/3 rmse score = 1.16661
[0]	validation_0-rmse:2.41270	validation_1-rmse:2.40848
[199]	validation_0-rmse:0.85277	validation_1-rmse:0.94584
Fold: 1/3 rmse score = 0.94584
[0]	validation_0-rmse:2.40974	validation_1-rmse:2.41425
[199]	validation_0-rmse:0.85825	validation_1-rmse:0.93839
Fold: 2/3 rmse score = 0.93839
[0]	validation_0-rmse:2.41249	validation_1-rmse:2.41300
[199]	validation_0-rmse:0.85096	validation_1-rmse:0.94045


[I 2023-10-10 17:11:58,369] Trial 18 finished with value: 0.9415584248785208 and parameters: {'booster': 'gbtree', 'lambda': 0.06870441050020963, 'alpha': 0.001008867356309089, 'subsample': 0.5677625149381036, 'colsample_bytree': 0.46267397765136364, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 9, 'eta': 0.0988204152102819, 'gamma': 0.09273277305571015, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 0.9415584248785208.


Fold: 3/3 rmse score = 0.94045
[0]	validation_0-rmse:2.59566	validation_1-rmse:2.59052
[199]	validation_0-rmse:1.26282	validation_1-rmse:1.27476
Fold: 1/3 rmse score = 1.27476
[0]	validation_0-rmse:2.59233	validation_1-rmse:2.59717
[199]	validation_0-rmse:1.26590	validation_1-rmse:1.27402


[I 2023-10-10 17:12:10,074] Trial 19 pruned. 


Fold: 2/3 rmse score = 1.27402
[0]	validation_0-rmse:2.48300	validation_1-rmse:2.47851
[99]	validation_0-rmse:0.97397	validation_1-rmse:1.00668
Fold: 1/3 rmse score = 1.00668
[0]	validation_0-rmse:2.48011	validation_1-rmse:2.48473
[99]	validation_0-rmse:0.97955	validation_1-rmse:0.99780
Fold: 2/3 rmse score = 0.99780
[0]	validation_0-rmse:2.48190	validation_1-rmse:2.48220
[99]	validation_0-rmse:0.97596	validation_1-rmse:1.00162


[I 2023-10-10 17:12:18,415] Trial 20 finished with value: 1.0020346126904993 and parameters: {'booster': 'gbtree', 'lambda': 0.042733330364855555, 'alpha': 0.08394543238747437, 'subsample': 0.6547691175699307, 'colsample_bytree': 0.4383448218989016, 'n_estimators': 100, 'max_depth': 8, 'min_child_weight': 9, 'eta': 0.06414846914903319, 'gamma': 0.00618944227597624, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 0.9415584248785208.


Fold: 3/3 rmse score = 1.00162
[0]	validation_0-rmse:2.39119	validation_1-rmse:2.38703
[499]	validation_0-rmse:0.87374	validation_1-rmse:0.95604
Fold: 1/3 rmse score = 0.95604
[0]	validation_0-rmse:2.38982	validation_1-rmse:2.39393
[499]	validation_0-rmse:0.87580	validation_1-rmse:0.94561
Fold: 2/3 rmse score = 0.94558
[0]	validation_0-rmse:2.39116	validation_1-rmse:2.39136
[499]	validation_0-rmse:0.87442	validation_1-rmse:0.95092


[I 2023-10-10 17:12:37,268] Trial 21 finished with value: 0.9508439068956044 and parameters: {'booster': 'gbtree', 'lambda': 0.10838080669934495, 'alpha': 0.0003943658550958195, 'subsample': 0.5007114190709858, 'colsample_bytree': 0.6627878938943187, 'n_estimators': 500, 'max_depth': 6, 'min_child_weight': 9, 'eta': 0.11114210357154419, 'gamma': 0.10422669691874459, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 0.9415584248785208.


Fold: 3/3 rmse score = 0.95090
[0]	validation_0-rmse:2.46988	validation_1-rmse:2.46542
[499]	validation_0-rmse:0.86630	validation_1-rmse:0.94814
Fold: 1/3 rmse score = 0.94814
[0]	validation_0-rmse:2.46779	validation_1-rmse:2.47228
[499]	validation_0-rmse:0.86874	validation_1-rmse:0.94031
Fold: 2/3 rmse score = 0.94029
[0]	validation_0-rmse:2.46923	validation_1-rmse:2.46957
[499]	validation_0-rmse:0.86797	validation_1-rmse:0.94497
Fold: 3/3 rmse score = 0.94497

[I 2023-10-10 17:12:58,243] Trial 22 finished with value: 0.9444681906014255 and parameters: {'booster': 'gbtree', 'lambda': 0.10036201771551814, 'alpha': 0.0015574736252593846, 'subsample': 0.5158664801781194, 'colsample_bytree': 0.6745341457678686, 'n_estimators': 500, 'max_depth': 7, 'min_child_weight': 9, 'eta': 0.0697017920303057, 'gamma': 0.14566804587799506, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 0.9415584248785208.



[0]	validation_0-rmse:2.58893	validation_1-rmse:2.58382
[299]	validation_0-rmse:1.07659	validation_1-rmse:1.08785
Fold: 1/3 rmse score = 1.08785
[0]	validation_0-rmse:2.58574	validation_1-rmse:2.59053
[299]	validation_0-rmse:1.08054	validation_1-rmse:1.07994


[I 2023-10-10 17:13:07,306] Trial 23 pruned. 


Fold: 2/3 rmse score = 1.07994
[0]	validation_0-rmse:2.48614	validation_1-rmse:2.48159
[699]	validation_0-rmse:0.71222	validation_1-rmse:0.91440
Fold: 1/3 rmse score = 0.91439
[0]	validation_0-rmse:2.48319	validation_1-rmse:2.48796
[699]	validation_0-rmse:0.71775	validation_1-rmse:0.90632
Fold: 2/3 rmse score = 0.90632
[0]	validation_0-rmse:2.48563	validation_1-rmse:2.48609
[699]	validation_0-rmse:0.71609	validation_1-rmse:0.91165


[I 2023-10-10 17:13:44,961] Trial 24 finished with value: 0.9107875282440095 and parameters: {'booster': 'gbtree', 'lambda': 0.009717824636759463, 'alpha': 0.002497145601900229, 'subsample': 0.6033891668210435, 'colsample_bytree': 0.5030543378392416, 'n_estimators': 700, 'max_depth': 9, 'min_child_weight': 7, 'eta': 0.061275504884375, 'gamma': 0.008455063373887487, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.


Fold: 3/3 rmse score = 0.91165
[0]	validation_0-rmse:2.56887	validation_1-rmse:2.56388
[699]	validation_0-rmse:0.87096	validation_1-rmse:0.94708
Fold: 1/3 rmse score = 0.94708
[0]	validation_0-rmse:2.56569	validation_1-rmse:2.57051
[699]	validation_0-rmse:0.87466	validation_1-rmse:0.93873
Fold: 2/3 rmse score = 0.93873
[0]	validation_0-rmse:2.56743	validation_1-rmse:2.56781
[699]	validation_0-rmse:0.87097	validation_1-rmse:0.94239


[I 2023-10-10 17:14:22,899] Trial 25 finished with value: 0.9427365060954968 and parameters: {'booster': 'gbtree', 'lambda': 0.006695658446135022, 'alpha': 0.0032751015727330137, 'subsample': 0.6221954289148531, 'colsample_bytree': 0.5003745172284199, 'n_estimators': 700, 'max_depth': 9, 'min_child_weight': 7, 'eta': 0.01968051258805344, 'gamma': 0.008154755350276618, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.


Fold: 3/3 rmse score = 0.94239
[0]	validation_0-rmse:2.60544	validation_1-rmse:2.60025
[699]	validation_0-rmse:1.45380	validation_1-rmse:1.46055
Fold: 1/3 rmse score = 1.46055
[0]	validation_0-rmse:2.60211	validation_1-rmse:2.60696
[699]	validation_0-rmse:1.45563	validation_1-rmse:1.46216


[I 2023-10-10 17:28:58,506] Trial 26 pruned. 


Fold: 2/3 rmse score = 1.46216
[0]	validation_0-rmse:2.55471	validation_1-rmse:2.54982
[799]	validation_0-rmse:0.82034	validation_1-rmse:0.92926
Fold: 1/3 rmse score = 0.92926
[0]	validation_0-rmse:2.55160	validation_1-rmse:2.55639
[799]	validation_0-rmse:0.82421	validation_1-rmse:0.92272
Fold: 2/3 rmse score = 0.92272
[0]	validation_0-rmse:2.55339	validation_1-rmse:2.55382
[799]	validation_0-rmse:0.82259	validation_1-rmse:0.92605


[I 2023-10-10 17:29:40,826] Trial 27 finished with value: 0.9260104347123387 and parameters: {'booster': 'gbtree', 'lambda': 0.005178674071862367, 'alpha': 0.005200339640573238, 'subsample': 0.6399585674873495, 'colsample_bytree': 0.498131943861357, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 7, 'eta': 0.026779731432385878, 'gamma': 0.0004514879688277118, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.


Fold: 3/3 rmse score = 0.92605
[0]	validation_0-rmse:2.59923	validation_1-rmse:2.59408
[799]	validation_0-rmse:1.01931	validation_1-rmse:1.04528
Fold: 1/3 rmse score = 1.04528
[0]	validation_0-rmse:2.59593	validation_1-rmse:2.60078
[799]	validation_0-rmse:1.02389	validation_1-rmse:1.03723


[I 2023-10-10 17:30:08,007] Trial 28 pruned. 


Fold: 2/3 rmse score = 1.03723
[0]	validation_0-rmse:2.17263	validation_1-rmse:2.17084
[386]	validation_0-rmse:0.55344	validation_1-rmse:0.92228
Fold: 1/3 rmse score = 0.92160
[0]	validation_0-rmse:2.17152	validation_1-rmse:2.17627
[443]	validation_0-rmse:0.52239	validation_1-rmse:0.91808
Fold: 2/3 rmse score = 0.91700
[0]	validation_0-rmse:2.17461	validation_1-rmse:2.17488
[479]	validation_0-rmse:0.49082	validation_1-rmse:0.92178


[I 2023-10-10 17:30:34,141] Trial 29 finished with value: 0.9197426102379577 and parameters: {'booster': 'gbtree', 'lambda': 0.029436370091299232, 'alpha': 8.445010251536444e-06, 'subsample': 0.7917397947575985, 'colsample_bytree': 0.540009241942139, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 7, 'eta': 0.22392923034711348, 'gamma': 0.016153126130448486, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.


Fold: 3/3 rmse score = 0.92063
[0]	validation_0-rmse:2.11162	validation_1-rmse:2.11001
[433]	validation_0-rmse:0.59249	validation_1-rmse:0.93279
Fold: 1/3 rmse score = 0.93121
[0]	validation_0-rmse:2.11147	validation_1-rmse:2.11525
[443]	validation_0-rmse:0.59241	validation_1-rmse:0.92341
Fold: 2/3 rmse score = 0.92181
[0]	validation_0-rmse:2.11403	validation_1-rmse:2.11380
[427]	validation_0-rmse:0.60099	validation_1-rmse:0.92800
Fold: 3/3 rmse score = 0.92577

[I 2023-10-10 17:30:56,217] Trial 30 finished with value: 0.9262659550657962 and parameters: {'booster': 'gbtree', 'lambda': 0.018475142137358784, 'alpha': 1.4433417903557856e-06, 'subsample': 0.7938714105060062, 'colsample_bytree': 0.5376818678368872, 'n_estimators': 1000, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.2583106141827791, 'gamma': 0.0010448705122828755, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.



[0]	validation_0-rmse:2.01790	validation_1-rmse:2.01713
[380]	validation_0-rmse:0.59262	validation_1-rmse:0.94377
Fold: 1/3 rmse score = 0.94153
[0]	validation_0-rmse:2.01802	validation_1-rmse:2.02182
[285]	validation_0-rmse:0.65924	validation_1-rmse:0.93336
Fold: 2/3 rmse score = 0.93221
[0]	validation_0-rmse:2.02256	validation_1-rmse:2.02245
[362]	validation_0-rmse:0.60705	validation_1-rmse:0.94140
Fold: 3/3 rmse score = 0.93833


[I 2023-10-10 17:31:14,582] Trial 31 finished with value: 0.9373560550560441 and parameters: {'booster': 'gbtree', 'lambda': 0.028402084240382652, 'alpha': 6.719185258957299e-07, 'subsample': 0.7771728934397648, 'colsample_bytree': 0.5319561455552326, 'n_estimators': 1000, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.3102445656143863, 'gamma': 3.009885375804869e-05, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.


[0]	validation_0-rmse:2.12980	validation_1-rmse:2.12824
[371]	validation_0-rmse:0.50616	validation_1-rmse:0.92382
Fold: 1/3 rmse score = 0.92275
[0]	validation_0-rmse:2.12892	validation_1-rmse:2.13359
[385]	validation_0-rmse:0.51186	validation_1-rmse:0.91864
Fold: 2/3 rmse score = 0.91729
[0]	validation_0-rmse:2.13200	validation_1-rmse:2.13234
[382]	validation_0-rmse:0.50453	validation_1-rmse:0.92249
Fold: 3/3 rmse score = 0.92044

[I 2023-10-10 17:31:38,165] Trial 32 finished with value: 0.9201614553280829 and parameters: {'booster': 'gbtree', 'lambda': 0.014921138261907522, 'alpha': 2.3243963074039038e-07, 'subsample': 0.8226600369068187, 'colsample_bytree': 0.5347830112603178, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.24693151778304392, 'gamma': 0.018605783491751692, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 0.9107875282440095.



[0]	validation_0-rmse:2.54246	validation_1-rmse:2.53765
[899]	validation_0-rmse:0.75669	validation_1-rmse:0.91100
Fold: 1/3 rmse score = 0.91100
[0]	validation_0-rmse:2.53990	validation_1-rmse:2.54470
[899]	validation_0-rmse:0.76185	validation_1-rmse:0.90385
Fold: 2/3 rmse score = 0.90385
[0]	validation_0-rmse:2.54127	validation_1-rmse:2.54154
[899]	validation_0-rmse:0.75544	validation_1-rmse:0.90473


[I 2023-10-10 17:32:26,360] Trial 33 finished with value: 0.9065227704823157 and parameters: {'booster': 'gbtree', 'lambda': 0.024084936368706265, 'alpha': 5.5738020952979477e-08, 'subsample': 0.8499605699530353, 'colsample_bytree': 0.5603763694096614, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.0322238261730932, 'gamma': 0.014854732584483683, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 0.9065227704823157.


Fold: 3/3 rmse score = 0.90473
[0]	validation_0-rmse:2.10787	validation_1-rmse:2.10630
[326]	validation_0-rmse:0.53283	validation_1-rmse:0.91951
Fold: 1/3 rmse score = 0.91769
[0]	validation_0-rmse:2.11075	validation_1-rmse:2.11536
[317]	validation_0-rmse:0.54599	validation_1-rmse:0.91209
Fold: 2/3 rmse score = 0.91044
[0]	validation_0-rmse:2.11121	validation_1-rmse:2.11123
[323]	validation_0-rmse:0.54053	validation_1-rmse:0.91986
Fold: 3/3 rmse score = 0.91870


[I 2023-10-10 17:32:47,241] Trial 34 finished with value: 0.9156099096275172 and parameters: {'booster': 'gbtree', 'lambda': 0.024862757263358178, 'alpha': 5.603224994177487e-08, 'subsample': 0.8594554336184166, 'colsample_bytree': 0.5792618785389534, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.2526924966962271, 'gamma': 0.02039574389065319, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 0.9065227704823157.


[0]	validation_0-rmse:1.35496	validation_1-rmse:1.36531
[119]	validation_0-rmse:0.57766	validation_1-rmse:1.06404
Fold: 1/3 rmse score = 0.99504
[0]	validation_0-rmse:1.36219	validation_1-rmse:1.36704
[123]	validation_0-rmse:0.59655	validation_1-rmse:1.05497
Fold: 2/3 rmse score = 0.99863
[0]	validation_0-rmse:1.35859	validation_1-rmse:1.35863
[123]	validation_0-rmse:0.58059	validation_1-rmse:1.05447


[I 2023-10-10 17:32:58,147] Trial 35 finished with value: 0.9960877958094851 and parameters: {'booster': 'gbtree', 'lambda': 0.23741932047277517, 'alpha': 5.315900586214124e-08, 'subsample': 0.8799237558091397, 'colsample_bytree': 0.6034942262269665, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 4, 'eta': 0.7408765555729779, 'gamma': 0.01998835123977796, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 0.9065227704823157.


Fold: 3/3 rmse score = 0.99459
[0]	validation_0-rmse:2.52310	validation_1-rmse:2.51828
[331]	validation_0-rmse:1.03592	validation_1-rmse:1.05630
Fold: 1/3 rmse score = 2.25535
[0]	validation_0-rmse:2.52056	validation_1-rmse:2.52532
[330]	validation_0-rmse:1.04192	validation_1-rmse:1.04763
Fold: 2/3 rmse score = 2.26155
[0]	validation_0-rmse:2.52188	validation_1-rmse:2.52240
[330]	validation_0-rmse:1.03757	validation_1-rmse:1.05107


[I 2023-10-10 17:37:30,427] Trial 36 finished with value: 2.258727196045251 and parameters: {'booster': 'dart', 'lambda': 0.03076499077295405, 'alpha': 1.50449415364338e-08, 'subsample': 0.891720735202244, 'colsample_bytree': 0.6168835457914971, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.0391549881742069, 'gamma': 0.0036489538581650103, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 0.8902235018104074, 'skip_drop': 1.702501716320659e-08}. Best is trial 33 with value: 0.9065227704823157.


Fold: 3/3 rmse score = 2.25929
[0]	validation_0-rmse:2.19881	validation_1-rmse:2.19596
[767]	validation_0-rmse:0.48406	validation_1-rmse:0.91612
Fold: 1/3 rmse score = 0.91574
[0]	validation_0-rmse:2.19976	validation_1-rmse:2.20388
[798]	validation_0-rmse:0.47061	validation_1-rmse:0.91087
Fold: 2/3 rmse score = 0.91010
[0]	validation_0-rmse:2.20058	validation_1-rmse:2.20065
[539]	validation_0-rmse:0.57364	validation_1-rmse:0.91350


[I 2023-10-10 17:38:04,119] Trial 37 finished with value: 0.9129279879381276 and parameters: {'booster': 'gbtree', 'lambda': 0.00020338672332576306, 'alpha': 6.902960237920367e-08, 'subsample': 0.8537077748329617, 'colsample_bytree': 0.5864804260004555, 'n_estimators': 900, 'max_depth': 8, 'min_child_weight': 6, 'eta': 0.20751109511970053, 'gamma': 0.02210559157777112, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 0.9065227704823157.


Fold: 3/3 rmse score = 0.91294
[0]	validation_0-rmse:2.58926	validation_1-rmse:2.58416
[999]	validation_0-rmse:0.92931	validation_1-rmse:0.97420
Fold: 1/3 rmse score = 0.97420
[0]	validation_0-rmse:2.58601	validation_1-rmse:2.59082
[999]	validation_0-rmse:0.93721	validation_1-rmse:0.96786


[I 2023-10-10 17:38:34,905] Trial 38 pruned. 


Fold: 2/3 rmse score = 0.96786
[0]	validation_0-rmse:2.60790	validation_1-rmse:2.60270
[899]	validation_0-rmse:2.30564	validation_1-rmse:2.30180
Fold: 1/3 rmse score = 2.30180
[0]	validation_0-rmse:2.60455	validation_1-rmse:2.60941
[899]	validation_0-rmse:2.30354	validation_1-rmse:2.30793


[I 2023-10-10 18:01:10,019] Trial 39 pruned. 


Fold: 2/3 rmse score = 2.30793
[0]	validation_0-rmse:2.60817	validation_1-rmse:2.60297
[899]	validation_0-rmse:2.50903	validation_1-rmse:2.50437
Fold: 1/3 rmse score = 2.50437
[0]	validation_0-rmse:2.60482	validation_1-rmse:2.60967
[899]	validation_0-rmse:2.50608	validation_1-rmse:2.51093


[I 2023-10-10 18:01:53,638] Trial 40 pruned. 


Fold: 2/3 rmse score = 2.51093
[0]	validation_0-rmse:2.23945	validation_1-rmse:2.23661
[480]	validation_0-rmse:0.52618	validation_1-rmse:0.91399
Fold: 1/3 rmse score = 0.91294
[0]	validation_0-rmse:2.23838	validation_1-rmse:2.24285
[437]	validation_0-rmse:0.55188	validation_1-rmse:0.91007
Fold: 2/3 rmse score = 0.90966
[0]	validation_0-rmse:2.23946	validation_1-rmse:2.23951
[480]	validation_0-rmse:0.52406	validation_1-rmse:0.91434


[I 2023-10-10 18:02:21,922] Trial 41 finished with value: 0.9118080495852642 and parameters: {'booster': 'gbtree', 'lambda': 0.03699747686862054, 'alpha': 1.8324104453305583e-08, 'subsample': 0.7693431588399194, 'colsample_bytree': 0.5683049547354748, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.1852557189137754, 'gamma': 0.015884075423981237, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 0.9065227704823157.


Fold: 3/3 rmse score = 0.91282
[0]	validation_0-rmse:2.24682	validation_1-rmse:2.24428
[694]	validation_0-rmse:0.41916	validation_1-rmse:0.90238
Fold: 1/3 rmse score = 0.90213
[0]	validation_0-rmse:2.24477	validation_1-rmse:2.24959
[701]	validation_0-rmse:0.42232	validation_1-rmse:0.89513
Fold: 2/3 rmse score = 0.89469
[0]	validation_0-rmse:2.24570	validation_1-rmse:2.24577
[543]	validation_0-rmse:0.49518	validation_1-rmse:0.90046


[I 2023-10-10 18:02:58,615] Trial 42 finished with value: 0.8989975631348216 and parameters: {'booster': 'gbtree', 'lambda': 0.0026449295029187334, 'alpha': 1.1698427232854759e-08, 'subsample': 0.9360287065839872, 'colsample_bytree': 0.5688259395217892, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.1818399274788474, 'gamma': 0.0035005411330456602, 'grow_policy': 'depthwise'}. Best is trial 42 with value: 0.8989975631348216.


Fold: 3/3 rmse score = 0.90018
[0]	validation_0-rmse:2.40770	validation_1-rmse:2.40370
[799]	validation_0-rmse:0.64490	validation_1-rmse:0.89985
Fold: 1/3 rmse score = 0.89983
[0]	validation_0-rmse:2.40510	validation_1-rmse:2.40954
[799]	validation_0-rmse:0.64698	validation_1-rmse:0.88969
Fold: 2/3 rmse score = 0.88967
[0]	validation_0-rmse:2.40622	validation_1-rmse:2.40644
[799]	validation_0-rmse:0.64386	validation_1-rmse:0.89772


[I 2023-10-10 18:03:35,069] Trial 43 finished with value: 0.8957313382356813 and parameters: {'booster': 'gbtree', 'lambda': 0.0031212004487157427, 'alpha': 1.3125027149309643e-08, 'subsample': 0.9226240655739183, 'colsample_bytree': 0.5563709697372726, 'n_estimators': 800, 'max_depth': 8, 'min_child_weight': 6, 'eta': 0.10070284604196998, 'gamma': 0.0024013475278437646, 'grow_policy': 'depthwise'}. Best is trial 43 with value: 0.8957313382356813.


Fold: 3/3 rmse score = 0.89770
[0]	validation_0-rmse:2.51814	validation_1-rmse:2.51353
[699]	validation_0-rmse:0.75509	validation_1-rmse:0.90917
Fold: 1/3 rmse score = 0.90917
[0]	validation_0-rmse:2.51509	validation_1-rmse:2.51993
[699]	validation_0-rmse:0.76075	validation_1-rmse:0.90333
Fold: 2/3 rmse score = 0.90333
[0]	validation_0-rmse:2.51646	validation_1-rmse:2.51675
[699]	validation_0-rmse:0.75568	validation_1-rmse:0.90516


[I 2023-10-10 18:04:12,680] Trial 44 finished with value: 0.9058842404717952 and parameters: {'booster': 'gbtree', 'lambda': 0.002410584556525973, 'alpha': 1.3876744629693508e-08, 'subsample': 0.9345965957353602, 'colsample_bytree': 0.5684385125902744, 'n_estimators': 700, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.04452198854203528, 'gamma': 0.0018636639699541777, 'grow_policy': 'depthwise'}. Best is trial 43 with value: 0.8957313382356813.


Fold: 3/3 rmse score = 0.90516
[0]	validation_0-rmse:2.51467	validation_1-rmse:2.50985
[599]	validation_0-rmse:0.83586	validation_1-rmse:0.93320
Fold: 1/3 rmse score = 0.93320
[0]	validation_0-rmse:2.51214	validation_1-rmse:2.51684
[599]	validation_0-rmse:0.84332	validation_1-rmse:0.92674
Fold: 2/3 rmse score = 0.92674
[0]	validation_0-rmse:2.51336	validation_1-rmse:2.51383
[599]	validation_0-rmse:0.83959	validation_1-rmse:0.93000


[I 2023-10-10 18:19:35,730] Trial 45 finished with value: 0.9299787992240934 and parameters: {'booster': 'dart', 'lambda': 0.0020223742284974345, 'alpha': 1.2184314925204472e-08, 'subsample': 0.9301753128937164, 'colsample_bytree': 0.6375807221705884, 'n_estimators': 600, 'max_depth': 8, 'min_child_weight': 6, 'eta': 0.04347327122905812, 'gamma': 0.0012190193755130952, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 9.776329107921942e-05, 'skip_drop': 4.429240963896771e-06}. Best is trial 43 with value: 0.8957313382356813.


Fold: 3/3 rmse score = 0.93000
[0]	validation_0-rmse:2.57920	validation_1-rmse:2.57413
[699]	validation_0-rmse:1.10769	validation_1-rmse:1.11615
Fold: 1/3 rmse score = 1.11615
[0]	validation_0-rmse:2.57599	validation_1-rmse:2.58077
[699]	validation_0-rmse:1.11316	validation_1-rmse:1.10940


[I 2023-10-10 18:19:48,171] Trial 46 pruned. 


Fold: 2/3 rmse score = 1.10940
[0]	validation_0-rmse:2.53263	validation_1-rmse:2.52791
[699]	validation_0-rmse:0.77829	validation_1-rmse:0.91408
Fold: 1/3 rmse score = 0.91408
[0]	validation_0-rmse:2.52955	validation_1-rmse:2.53439
[699]	validation_0-rmse:0.78592	validation_1-rmse:0.90716
Fold: 2/3 rmse score = 0.90716
[0]	validation_0-rmse:2.53090	validation_1-rmse:2.53120
[699]	validation_0-rmse:0.77978	validation_1-rmse:0.90972


[I 2023-10-10 18:20:26,023] Trial 47 finished with value: 0.910319095938804 and parameters: {'booster': 'gbtree', 'lambda': 0.000733828764275135, 'alpha': 1.0380129688779815e-08, 'subsample': 0.9182558043052323, 'colsample_bytree': 0.5702923434084608, 'n_estimators': 700, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.03733224145607229, 'gamma': 0.00044979787287273854, 'grow_policy': 'depthwise'}. Best is trial 43 with value: 0.8957313382356813.


Fold: 3/3 rmse score = 0.90972
[0]	validation_0-rmse:1.17469	validation_1-rmse:1.18732
[120]	validation_0-rmse:0.64092	validation_1-rmse:1.07874
Fold: 1/3 rmse score = 1.00295
[0]	validation_0-rmse:1.17861	validation_1-rmse:1.18211
[130]	validation_0-rmse:0.63398	validation_1-rmse:1.06217
Fold: 2/3 rmse score = 0.99671
[0]	validation_0-rmse:1.17877	validation_1-rmse:1.17827
[130]	validation_0-rmse:0.62642	validation_1-rmse:1.08654


[I 2023-10-10 18:20:36,097] Trial 48 finished with value: 1.00173026520643 and parameters: {'booster': 'gbtree', 'lambda': 0.000709666261209232, 'alpha': 2.6975470721312634e-08, 'subsample': 0.9409657636496274, 'colsample_bytree': 0.7196911784368303, 'n_estimators': 700, 'max_depth': 8, 'min_child_weight': 3, 'eta': 0.9129775065590999, 'gamma': 0.0002837258089877021, 'grow_policy': 'depthwise'}. Best is trial 43 with value: 0.8957313382356813.


Fold: 3/3 rmse score = 1.00553
[0]	validation_0-rmse:2.60359	validation_1-rmse:2.59841
[799]	validation_0-rmse:1.08404	validation_1-rmse:1.10512
Fold: 1/3 rmse score = 1.10512
[0]	validation_0-rmse:2.60025	validation_1-rmse:2.60511
[799]	validation_0-rmse:1.08901	validation_1-rmse:1.10255


[I 2023-10-10 18:21:16,943] Trial 49 pruned. 


Fold: 2/3 rmse score = 1.10255


In [9]:
trial = study.best_trial

In [10]:
print(trial.params)

{'booster': 'gbtree', 'lambda': 0.0031212004487157427, 'alpha': 1.3125027149309643e-08, 'subsample': 0.9226240655739183, 'colsample_bytree': 0.5563709697372726, 'n_estimators': 800, 'max_depth': 8, 'min_child_weight': 6, 'eta': 0.10070284604196998, 'gamma': 0.0024013475278437646, 'grow_policy': 'depthwise'}


In [11]:
best_param = {
  'booster': 'gbtree',
  'lambda': 0.0031212004487157427,
  'alpha': 1.3125027149309643e-08,
  'subsample': 0.9226240655739183,
  'colsample_bytree': 0.5563709697372726,
  'n_estimators': 800,
  'max_depth': 8,
  'min_child_weight': 6,
  'eta': 0.10070284604196998,
  'gamma': 0.0024013475278437646,
  'grow_policy': 'depthwise'
}

val_scores, y_tests, models = train_model(best_param, train_x, train_y, train_null_df, test=True)

[0]	validation_0-rmse:2.40770	validation_1-rmse:2.40370
[799]	validation_0-rmse:0.64490	validation_1-rmse:0.89985
Fold: 1/3 rmse score = 0.89983
[0]	validation_0-rmse:2.40510	validation_1-rmse:2.40954
[799]	validation_0-rmse:0.64698	validation_1-rmse:0.88969
Fold: 2/3 rmse score = 0.88967
[0]	validation_0-rmse:2.40622	validation_1-rmse:2.40644
[799]	validation_0-rmse:0.64386	validation_1-rmse:0.89772
Fold: 3/3 rmse score = 0.89770


In [12]:
for idx, model in enumerate(models):
  file_name = f"./models/BN_model_{idx}.model"
  model.save_model(file_name)

In [13]:
train_null_df["BN"] = np.mean(y_tests, axis=0)

In [14]:
result = pd.concat([train_df, train_null_df], axis=0).sort_index()

In [15]:
result.head(10)

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Container,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year
0,0,0,0,1,0,0,0,0,0,0,...,0,0,0,-1.081668,15,4,3,10,3,2020
1,0,0,0,1,0,0,0,0,0,0,...,1,0,0,-0.188614,17,2,55,9,1,2019
2,0,0,0,1,0,0,0,0,0,0,...,1,0,0,-0.305226,23,6,43,2,5,2019
3,0,0,0,0,0,0,0,1,0,0,...,0,0,0,-1.07438,18,22,6,9,4,2020
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,1.209272,13,12,57,8,5,2022
5,0,0,0,1,0,0,0,0,0,0,...,1,0,0,-0.84067,8,14,24,9,1,2015
6,0,0,0,0,0,0,0,1,0,0,...,1,0,0,-0.50784,25,21,34,1,0,2021
7,0,0,0,0,0,0,0,1,0,0,...,0,0,0,-0.459737,18,3,48,6,1,2019
8,0,0,0,0,0,0,0,1,0,0,...,1,0,0,-0.488404,27,6,23,1,4,2017
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.008168,3,7,8,2,5,2018


In [16]:
result.to_parquet("./data/train_v2_2bn.parquet")

In [20]:
test_df = pd.read_parquet('./data/test_v2_3.parquet')
bn_null_idx = test_df[test_df["BN"].isna()].index
test_null_df = test_df.loc[bn_null_idx]
bn_idx = test_df[test_df["BN"].notna()].index
test_df = test_df.loc[bn_idx]

In [22]:
print(len(test_df))
print(len(test_null_df))

143062
101927


In [23]:
test_null_df = test_null_df.drop(columns=["BN"])

In [24]:
test_winds = []
for model in models:
  test_winds.append(model.predict(test_null_df))

In [25]:
test_null_df["BN"] = np.mean(test_winds, axis=0)

In [26]:
assert sorted(train_df.columns) == sorted(test_df.columns)

In [27]:
test_null_df.head()

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year,BN
3,0,0,0,0,0,0,0,1,0,0,...,0,0,-1.152121,2,0,59,8,1,2016,0.606377
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0.269088,14,10,6,7,5,2018,2.777071
6,0,0,0,0,0,0,0,0,0,0,...,0,0,-0.524846,18,14,11,1,2,2017,0.690901
7,0,0,0,0,0,0,0,0,0,0,...,0,0,-0.613276,12,0,57,10,3,2017,1.149996
11,0,0,0,0,0,0,0,0,0,0,...,0,0,-1.337242,2,6,6,4,5,2016,2.351849


In [28]:
test_result = pd.concat([test_df, test_null_df], axis=0).sort_index()

In [30]:
test_result.head(10)

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Container,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year
0,0,0,0,0,0,0,0,1,0,0,...,0,1,0,1.279239,27,8,29,8,5,2022
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,1.81614,27,20,57,3,6,2022
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.789955,18,2,6,1,2,2023
3,0,0,0,0,0,0,0,1,0,0,...,0,0,0,-1.152121,2,0,59,8,1,2016
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0.821537,24,0,27,1,1,2023
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.269088,14,10,6,7,5,2018
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,-0.524846,18,14,11,1,2,2017
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,-0.613276,12,0,57,10,3,2017
8,0,0,0,0,0,0,0,1,0,0,...,0,0,0,-0.087551,13,0,35,3,5,2021
9,0,0,0,0,0,0,0,1,0,0,...,1,0,0,-0.985949,29,5,46,7,2,2020


In [31]:
test_df.to_parquet("./data/test_v2_3.parquet")