In [1]:
import warnings
warnings.filterwarnings(action="ignore")
import datetime
import time
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import HyperbandPruner

In [2]:
RANDOM_SEED = 99

In [3]:
train_df = pd.read_parquet('./data/train_v2_2_nn.parquet').drop(columns=["V_WIND"])
train_null_df = pd.read_parquet('./data/train_v2_2_n.parquet').drop(columns=["U_WIND", "V_WIND"])
test_df = pd.read_parquet('./data/test_v2_2.parquet').drop(columns=["U_WIND", "V_WIND"])

In [4]:
train_x = train_df.drop(columns=["U_WIND"])
train_y = train_df["U_WIND"]

In [4]:
def create_model(param):
    return xgb.XGBRegressor(
        random_state=RANDOM_SEED,
        tree_method="gpu_hist",
        eval_metric="rmse",
        early_stopping_rounds=100,
        **param
    )

In [14]:
def train_model(param, X, y, X_test, trial=None, test=False):
  skf = KFold(n_splits=3, shuffle=True, random_state=RANDOM_SEED)

  val_scores = []
  y_tests = []
  models = []

  for idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):

    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

    model = create_model(param)
    model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=1000)

    y_hat_val = model.predict(X_val)
    score = mean_squared_error(y_val.values, y_hat_val)
    val_scores.append(score)
    print(f'Fold: {idx+1}/3 mse score = {score:.5f}')

    if test:
      y_tests.append(model.predict(X_test))
      models.append(model)

    if trial:
      trial.report(score, idx)

      if trial.should_prune():
        raise optuna.TrialPruned()

  return val_scores, y_tests, models

In [15]:
def objective_xgb(trial):
  param = {
    "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
    "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
    "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    "subsample": trial.suggest_float("subsample", 0.1, 1.0),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
    "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
    "max_depth": trial.suggest_int("max_depth", 3, 9),
    "min_child_weight": trial.suggest_int("min_child_weight", 2, 10),
    "eta": trial.suggest_float("eta", 1e-8, 1.0, log=True),
    "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
    "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
  }

  if param["booster"] == "dart":
    param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
    param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
    param['rate_drop'] = trial.suggest_float('rate_drop', 1e-8, 1.0, log=True)
    param['skip_drop'] = trial.suggest_float('skip_drop', 1e-8, 1.0, log=True)

  val_scores, y_tests, models = train_model(param, train_x, train_y, test_df, trial)

  return sum(val_scores) / len(val_scores)

In [16]:
study = optuna.create_study(
    sampler=TPESampler(seed=RANDOM_SEED),
    direction='minimize',
    study_name='xgb_tuning',
    pruner=HyperbandPruner(
      min_resource=1, max_resource=8, reduction_factor=3
    ),
)

study.optimize(objective_xgb, n_trials=50)

[I 2023-10-09 19:14:37,275] A new study created in memory with name: xgb_tuning


[0]	validation_0-rmse:3.46023	validation_1-rmse:3.45690
[299]	validation_0-rmse:3.46023	validation_1-rmse:3.45690
Fold: 1/3 rmse score = 11.95018
[0]	validation_0-rmse:3.46244	validation_1-rmse:3.45248
[299]	validation_0-rmse:3.46243	validation_1-rmse:3.45248
Fold: 2/3 rmse score = 11.91960
[0]	validation_0-rmse:3.45469	validation_1-rmse:3.46796
[299]	validation_0-rmse:3.45469	validation_1-rmse:3.46796


[I 2023-10-09 19:14:48,603] Trial 0 finished with value: 11.965502264735647 and parameters: {'booster': 'gbtree', 'lambda': 0.04017550942202233, 'alpha': 1.7847241527490656e-08, 'subsample': 0.8272449670283629, 'colsample_bytree': 0.6090556776494775, 'n_estimators': 300, 'max_depth': 3, 'min_child_weight': 10, 'eta': 1.1339812255777714e-08, 'gamma': 0.014399394343769443, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 11.965502264735647.


Fold: 3/3 rmse score = 12.02673
[0]	validation_0-rmse:3.46023	validation_1-rmse:3.45690
[899]	validation_0-rmse:3.45960	validation_1-rmse:3.45630
Fold: 1/3 rmse score = 11.94600
[0]	validation_0-rmse:3.46243	validation_1-rmse:3.45248
[899]	validation_0-rmse:3.46182	validation_1-rmse:3.45186
Fold: 2/3 rmse score = 11.91535
[0]	validation_0-rmse:3.45469	validation_1-rmse:3.46796
[899]	validation_0-rmse:3.45408	validation_1-rmse:3.46734


[I 2023-10-09 19:47:16,063] Trial 1 finished with value: 11.961268186582119 and parameters: {'booster': 'dart', 'lambda': 1.4575798463318516e-05, 'alpha': 0.6189425995687715, 'subsample': 0.5719732438377161, 'colsample_bytree': 0.1842517839679773, 'n_estimators': 900, 'max_depth': 4, 'min_child_weight': 6, 'eta': 2.178476742104051e-06, 'gamma': 0.033817296080697445, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 1.4193450623251104e-07, 'skip_drop': 4.983900949448835e-07}. Best is trial 1 with value: 11.961268186582119.


Fold: 3/3 rmse score = 12.02246
[0]	validation_0-rmse:3.46023	validation_1-rmse:3.45690
[199]	validation_0-rmse:3.46022	validation_1-rmse:3.45690
Fold: 1/3 rmse score = 11.95016
[0]	validation_0-rmse:3.46244	validation_1-rmse:3.45248
[199]	validation_0-rmse:3.46243	validation_1-rmse:3.45247
Fold: 2/3 rmse score = 11.91958
[0]	validation_0-rmse:3.45469	validation_1-rmse:3.46796
[199]	validation_0-rmse:3.45469	validation_1-rmse:3.46796


[I 2023-10-09 19:47:59,596] Trial 2 finished with value: 11.965485853232332 and parameters: {'booster': 'gbtree', 'lambda': 7.591896035963056e-07, 'alpha': 1.128298527729365e-08, 'subsample': 0.9087797683651598, 'colsample_bytree': 0.597010984189897, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 6, 'eta': 2.1580835301822618e-08, 'gamma': 0.00015813016339763232, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 11.961268186582119.


Fold: 3/3 rmse score = 12.02671
[0]	validation_0-rmse:3.32307	validation_1-rmse:3.32720
[399]	validation_0-rmse:2.66384	validation_1-rmse:2.83709
Fold: 1/3 rmse score = 8.04246
[0]	validation_0-rmse:3.32858	validation_1-rmse:3.31546
[399]	validation_0-rmse:2.66642	validation_1-rmse:2.81512
Fold: 2/3 rmse score = 7.91799
[0]	validation_0-rmse:3.32304	validation_1-rmse:3.33316
[399]	validation_0-rmse:2.67054	validation_1-rmse:2.83163


[I 2023-10-09 19:48:14,421] Trial 3 finished with value: 7.9923882886718935 and parameters: {'booster': 'gbtree', 'lambda': 0.5574271302327638, 'alpha': 5.369144319173959e-05, 'subsample': 0.3376820528693848, 'colsample_bytree': 0.5944405760344648, 'n_estimators': 400, 'max_depth': 4, 'min_child_weight': 10, 'eta': 0.32615150214792754, 'gamma': 0.05011280824271637, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 8.01672
[0]	validation_0-rmse:3.46023	validation_1-rmse:3.45690
[699]	validation_0-rmse:3.46016	validation_1-rmse:3.45684
Fold: 1/3 rmse score = 11.94977
[0]	validation_0-rmse:3.46244	validation_1-rmse:3.45248
[699]	validation_0-rmse:3.46237	validation_1-rmse:3.45242
Fold: 2/3 rmse score = 11.91917
[0]	validation_0-rmse:3.45469	validation_1-rmse:3.46796
[699]	validation_0-rmse:3.45463	validation_1-rmse:3.46790


[I 2023-10-09 19:48:36,815] Trial 4 finished with value: 11.965083406494722 and parameters: {'booster': 'gbtree', 'lambda': 1.9040058970389597e-05, 'alpha': 0.0013569405041690126, 'subsample': 0.9788061899951741, 'colsample_bytree': 0.7347595233221714, 'n_estimators': 700, 'max_depth': 4, 'min_child_weight': 4, 'eta': 1.9058143481065904e-07, 'gamma': 2.700633373820546e-06, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 12.02631
[0]	validation_0-rmse:3.46011	validation_1-rmse:3.45679
[999]	validation_0-rmse:3.44618	validation_1-rmse:3.44337
Fold: 1/3 rmse score = 11.85678
[0]	validation_0-rmse:3.46232	validation_1-rmse:3.45236
[999]	validation_0-rmse:3.44867	validation_1-rmse:3.43849


[I 2023-10-09 20:15:17,679] Trial 5 pruned. 


Fold: 2/3 rmse score = 11.82318
[0]	validation_0-rmse:3.45953	validation_1-rmse:3.45623
[599]	validation_0-rmse:3.23449	validation_1-rmse:3.24038
Fold: 1/3 rmse score = 10.50003
[0]	validation_0-rmse:3.46174	validation_1-rmse:3.45178
[599]	validation_0-rmse:3.23913	validation_1-rmse:3.22994


[I 2023-10-09 20:15:31,732] Trial 6 pruned. 


Fold: 2/3 rmse score = 10.43253
[0]	validation_0-rmse:3.46022	validation_1-rmse:3.45690
[899]	validation_0-rmse:3.45803	validation_1-rmse:3.45480
Fold: 1/3 rmse score = 11.93562
[0]	validation_0-rmse:3.46243	validation_1-rmse:3.45247
[899]	validation_0-rmse:3.46027	validation_1-rmse:3.45031


[I 2023-10-09 20:36:55,531] Trial 7 pruned. 


Fold: 2/3 rmse score = 11.90460
[0]	validation_0-rmse:3.45992	validation_1-rmse:3.45661
[799]	validation_0-rmse:3.30382	validation_1-rmse:3.30634
Fold: 1/3 rmse score = 10.93190
[0]	validation_0-rmse:3.46213	validation_1-rmse:3.45217
[799]	validation_0-rmse:3.30795	validation_1-rmse:3.29803
Fold: 2/3 rmse score = 10.87699
[0]	validation_0-rmse:3.45439	validation_1-rmse:3.46765
[799]	validation_0-rmse:3.30161	validation_1-rmse:3.31296


[I 2023-10-09 21:02:35,809] Trial 8 finished with value: 10.928205396951215 and parameters: {'booster': 'dart', 'lambda': 0.016269894265187258, 'alpha': 0.00027473371985831937, 'subsample': 0.7205784430671397, 'colsample_bytree': 0.32422360794447835, 'n_estimators': 800, 'max_depth': 5, 'min_child_weight': 7, 'eta': 0.000640160651175959, 'gamma': 0.0007690362020385368, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 2.469722638494642e-07, 'skip_drop': 0.0028449094047407666}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 10.97572
[0]	validation_0-rmse:3.46023	validation_1-rmse:3.45690
[699]	validation_0-rmse:3.46020	validation_1-rmse:3.45687
Fold: 1/3 rmse score = 11.94998
[0]	validation_0-rmse:3.46244	validation_1-rmse:3.45248
[699]	validation_0-rmse:3.46241	validation_1-rmse:3.45245


[I 2023-10-09 21:16:21,137] Trial 9 pruned. 


Fold: 2/3 rmse score = 11.91940
[0]	validation_0-rmse:3.16920	validation_1-rmse:3.18346
[105]	validation_0-rmse:4.59990	validation_1-rmse:5.65503
Fold: 1/3 rmse score = 9.29222
[0]	validation_0-rmse:3.15935	validation_1-rmse:3.15032
[103]	validation_0-rmse:3.94305	validation_1-rmse:4.55887
Fold: 2/3 rmse score = 9.17445
[0]	validation_0-rmse:3.14879	validation_1-rmse:3.15540
[106]	validation_0-rmse:3.83693	validation_1-rmse:4.54882


[I 2023-10-09 21:16:34,314] Trial 10 finished with value: 9.210106853932665 and parameters: {'booster': 'gbtree', 'lambda': 0.20136472246731266, 'alpha': 0.0037294959598213996, 'subsample': 0.1136943669843487, 'colsample_bytree': 0.9836482175958339, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.5421827357095554, 'gamma': 0.6491750314063931, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 9.16364
[0]	validation_0-rmse:3.08497	validation_1-rmse:3.10601
[101]	validation_0-rmse:4220.70638	validation_1-rmse:4984.97162
Fold: 1/3 rmse score = 9.47471
[0]	validation_0-rmse:3.09618	validation_1-rmse:3.09293
[100]	validation_0-rmse:2583.18797	validation_1-rmse:2901.25936
Fold: 2/3 rmse score = 9.50833
[0]	validation_0-rmse:3.09012	validation_1-rmse:3.09077
[100]	validation_0-rmse:12148.34771	validation_1-rmse:15272.63646


[I 2023-10-09 21:16:45,532] Trial 11 finished with value: 9.497830302763616 and parameters: {'booster': 'gbtree', 'lambda': 0.6915180707346773, 'alpha': 0.007676817778163076, 'subsample': 0.10260407700657599, 'colsample_bytree': 0.9303022934183071, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.989503289598702, 'gamma': 0.7293916524775244, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 9.51045
[0]	validation_0-rmse:3.10837	validation_1-rmse:3.12355
[128]	validation_0-rmse:2.38611	validation_1-rmse:3.07306
Fold: 1/3 rmse score = 8.28995
[0]	validation_0-rmse:3.11637	validation_1-rmse:3.10621
[116]	validation_0-rmse:2.43097	validation_1-rmse:3.02439
Fold: 2/3 rmse score = 8.07271
[0]	validation_0-rmse:3.10888	validation_1-rmse:3.11979
[119]	validation_0-rmse:2.43942	validation_1-rmse:3.09468


[I 2023-10-09 21:16:58,890] Trial 12 finished with value: 8.17436166183875 and parameters: {'booster': 'gbtree', 'lambda': 0.9390089759158639, 'alpha': 0.0085597513515605, 'subsample': 0.4052937943893192, 'colsample_bytree': 0.965774787473821, 'n_estimators': 400, 'max_depth': 7, 'min_child_weight': 2, 'eta': 0.6887165204615721, 'gamma': 0.998337799421535, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 8.16042
[0]	validation_0-rmse:3.41750	validation_1-rmse:3.41621
[99]	validation_0-rmse:2.85033	validation_1-rmse:2.91031
Fold: 1/3 rmse score = 8.46993
[0]	validation_0-rmse:3.42064	validation_1-rmse:3.41051
[99]	validation_0-rmse:2.85977	validation_1-rmse:2.89700
Fold: 2/3 rmse score = 8.39262
[0]	validation_0-rmse:3.41353	validation_1-rmse:3.42619
[99]	validation_0-rmse:2.85889	validation_1-rmse:2.90114


[I 2023-10-09 21:17:10,766] Trial 13 finished with value: 8.426383083917411 and parameters: {'booster': 'gbtree', 'lambda': 0.006004222319263677, 'alpha': 5.162400489170195e-05, 'subsample': 0.4220775521125347, 'colsample_bytree': 0.7575724832065093, 'n_estimators': 100, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.05933074855108428, 'gamma': 0.6683362526305419, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 8.41660
[0]	validation_0-rmse:3.45026	validation_1-rmse:3.44746
[499]	validation_0-rmse:2.86557	validation_1-rmse:2.92010
Fold: 1/3 rmse score = 8.52699
[0]	validation_0-rmse:3.45270	validation_1-rmse:3.44265
[499]	validation_0-rmse:2.87134	validation_1-rmse:2.90174
Fold: 2/3 rmse score = 8.42007


[I 2023-10-09 21:17:28,771] Trial 14 pruned. 


[0]	validation_0-rmse:3.44692	validation_1-rmse:3.44432
[299]	validation_0-rmse:2.85162	validation_1-rmse:2.91332
Fold: 1/3 rmse score = 8.48746
[0]	validation_0-rmse:3.44972	validation_1-rmse:3.43969
[299]	validation_0-rmse:2.85124	validation_1-rmse:2.89114
Fold: 2/3 rmse score = 8.35868
[0]	validation_0-rmse:3.44207	validation_1-rmse:3.45513
[299]	validation_0-rmse:2.85492	validation_1-rmse:2.89849


[I 2023-10-09 21:17:55,324] Trial 15 finished with value: 8.415801257430997 and parameters: {'booster': 'gbtree', 'lambda': 0.0016302588828141604, 'alpha': 0.00022220824010587233, 'subsample': 0.5039745036025409, 'colsample_bytree': 0.703970317433247, 'n_estimators': 300, 'max_depth': 7, 'min_child_weight': 3, 'eta': 0.01974121531316481, 'gamma': 0.0018815303958341514, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 7.9923882886718935.


Fold: 3/3 rmse score = 8.40127
[0]	validation_0-rmse:3.35864	validation_1-rmse:3.35945
[499]	validation_0-rmse:2.22738	validation_1-rmse:2.69907
Fold: 1/3 rmse score = 7.28323
[0]	validation_0-rmse:3.36457	validation_1-rmse:3.35388
[499]	validation_0-rmse:2.23125	validation_1-rmse:2.69848
Fold: 2/3 rmse score = 7.27821
[0]	validation_0-rmse:3.35473	validation_1-rmse:3.36719
[499]	validation_0-rmse:2.22357	validation_1-rmse:2.68514
Fold: 3/3 rmse score = 7.21000

[I 2023-10-09 21:18:32,173] Trial 16 finished with value: 7.257146311224218 and parameters: {'booster': 'gbtree', 'lambda': 0.0011101304704990158, 'alpha': 0.018777324539088213, 'subsample': 0.36059458575414616, 'colsample_bytree': 0.8542635526574458, 'n_estimators': 500, 'max_depth': 7, 'min_child_weight': 8, 'eta': 0.14566841873916728, 'gamma': 0.10061518059491076, 'grow_policy': 'lossguide'}. Best is trial 16 with value: 7.257146311224218.



[0]	validation_0-rmse:3.45826	validation_1-rmse:3.45501
[599]	validation_0-rmse:3.08626	validation_1-rmse:3.10051
Fold: 1/3 rmse score = 9.61318
[0]	validation_0-rmse:3.46059	validation_1-rmse:3.45061
[599]	validation_0-rmse:3.09271	validation_1-rmse:3.08447


[I 2023-10-09 21:18:49,263] Trial 17 pruned. 


Fold: 2/3 rmse score = 9.51397
[0]	validation_0-rmse:3.39508	validation_1-rmse:3.39593
[199]	validation_0-rmse:2.40685	validation_1-rmse:2.67931
Fold: 1/3 rmse score = 7.17871
[0]	validation_0-rmse:3.39880	validation_1-rmse:3.38893
[199]	validation_0-rmse:2.39462	validation_1-rmse:2.66114
Fold: 2/3 rmse score = 7.08110
[0]	validation_0-rmse:3.39195	validation_1-rmse:3.40488
[199]	validation_0-rmse:2.39011	validation_1-rmse:2.65341


[I 2023-10-09 21:19:02,994] Trial 18 finished with value: 7.100132162699477 and parameters: {'booster': 'gbtree', 'lambda': 0.06870441050020963, 'alpha': 0.001008867356309089, 'subsample': 0.5677625149381036, 'colsample_bytree': 0.46267397765136364, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 9, 'eta': 0.0988204152102819, 'gamma': 0.09273277305571015, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 7.100132162699477.


Fold: 3/3 rmse score = 7.04058
[0]	validation_0-rmse:3.45590	validation_1-rmse:3.45284
[199]	validation_0-rmse:3.02112	validation_1-rmse:3.05299
Fold: 1/3 rmse score = 9.32074
[0]	validation_0-rmse:3.45823	validation_1-rmse:3.44827
[199]	validation_0-rmse:3.02351	validation_1-rmse:3.03366


[I 2023-10-09 21:19:13,417] Trial 19 pruned. 


Fold: 2/3 rmse score = 9.20312
[0]	validation_0-rmse:3.42670	validation_1-rmse:3.42553
[99]	validation_0-rmse:2.78060	validation_1-rmse:2.86187
Fold: 1/3 rmse score = 8.19028
[0]	validation_0-rmse:3.42955	validation_1-rmse:3.41968
[99]	validation_0-rmse:2.78492	validation_1-rmse:2.83884
Fold: 2/3 rmse score = 8.05904
[0]	validation_0-rmse:3.42224	validation_1-rmse:3.43545
[99]	validation_0-rmse:2.78376	validation_1-rmse:2.84737


[I 2023-10-09 21:19:21,860] Trial 20 finished with value: 8.11895209742756 and parameters: {'booster': 'gbtree', 'lambda': 0.042733330364855555, 'alpha': 0.08394543238747437, 'subsample': 0.6547691175699307, 'colsample_bytree': 0.4383448218989016, 'n_estimators': 100, 'max_depth': 8, 'min_child_weight': 9, 'eta': 0.06414846914903319, 'gamma': 0.00618944227597624, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 7.100132162699477.


Fold: 3/3 rmse score = 8.10753
[0]	validation_0-rmse:3.39573	validation_1-rmse:3.39624
[499]	validation_0-rmse:2.45235	validation_1-rmse:2.70590
Fold: 1/3 rmse score = 7.32190
[0]	validation_0-rmse:3.40034	validation_1-rmse:3.38977
[499]	validation_0-rmse:2.45814	validation_1-rmse:2.69204
Fold: 2/3 rmse score = 7.24546
[0]	validation_0-rmse:3.39366	validation_1-rmse:3.40536
[499]	validation_0-rmse:2.45945	validation_1-rmse:2.69301
Fold: 3/3 rmse score = 7.25230


[I 2023-10-09 21:19:41,021] Trial 21 finished with value: 7.273221476949438 and parameters: {'booster': 'gbtree', 'lambda': 0.10838080669934495, 'alpha': 0.0003943658550958195, 'subsample': 0.5007114190709858, 'colsample_bytree': 0.6627878938943187, 'n_estimators': 500, 'max_depth': 6, 'min_child_weight': 9, 'eta': 0.11114210357154419, 'gamma': 0.10422669691874459, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 7.100132162699477.


[0]	validation_0-rmse:3.41652	validation_1-rmse:3.41582
[499]	validation_0-rmse:2.43655	validation_1-rmse:2.68402
Fold: 1/3 rmse score = 7.20227
[0]	validation_0-rmse:3.42042	validation_1-rmse:3.41000
[499]	validation_0-rmse:2.44248	validation_1-rmse:2.67452
Fold: 2/3 rmse score = 7.15233
[0]	validation_0-rmse:3.41353	validation_1-rmse:3.42590
[499]	validation_0-rmse:2.44088	validation_1-rmse:2.67236
Fold: 3/3 rmse score = 7.14151


[I 2023-10-09 21:20:02,455] Trial 22 finished with value: 7.165369102698147 and parameters: {'booster': 'gbtree', 'lambda': 0.10036201771551814, 'alpha': 0.0015574736252593846, 'subsample': 0.5158664801781194, 'colsample_bytree': 0.6745341457678686, 'n_estimators': 500, 'max_depth': 7, 'min_child_weight': 9, 'eta': 0.0697017920303057, 'gamma': 0.14566804587799506, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 7.100132162699477.


[0]	validation_0-rmse:3.44978	validation_1-rmse:3.44698
[299]	validation_0-rmse:2.79264	validation_1-rmse:2.86707
Fold: 1/3 rmse score = 8.22010
[0]	validation_0-rmse:3.45216	validation_1-rmse:3.44231
[299]	validation_0-rmse:2.78863	validation_1-rmse:2.84249
Fold: 2/3 rmse score = 8.07973

[I 2023-10-09 21:20:16,715] Trial 23 pruned. 



[0]	validation_0-rmse:3.36252	validation_1-rmse:3.36585
[699]	validation_0-rmse:1.68354	validation_1-rmse:2.62501
Fold: 1/3 rmse score = 6.88824
[0]	validation_0-rmse:3.36813	validation_1-rmse:3.35753
[699]	validation_0-rmse:1.69472	validation_1-rmse:2.61122
Fold: 2/3 rmse score = 6.81718
[0]	validation_0-rmse:3.36200	validation_1-rmse:3.37414
[699]	validation_0-rmse:1.70839	validation_1-rmse:2.62186


[I 2023-10-09 21:20:50,673] Trial 24 finished with value: 6.858429981947185 and parameters: {'booster': 'gbtree', 'lambda': 0.07047565157479507, 'alpha': 0.02013676035072308, 'subsample': 0.5094564948585608, 'colsample_bytree': 0.6645083758135852, 'n_estimators': 700, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.15481838890693606, 'gamma': 0.008455063373887487, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 6.86987
[0]	validation_0-rmse:3.43223	validation_1-rmse:3.43060
[699]	validation_0-rmse:2.34269	validation_1-rmse:2.65020
Fold: 1/3 rmse score = 7.02355
[0]	validation_0-rmse:3.43524	validation_1-rmse:3.42509
[699]	validation_0-rmse:2.34843	validation_1-rmse:2.64167
Fold: 2/3 rmse score = 6.97841
[0]	validation_0-rmse:3.42775	validation_1-rmse:3.44048
[699]	validation_0-rmse:2.35223	validation_1-rmse:2.64916


[I 2023-10-09 21:21:23,148] Trial 25 finished with value: 7.0066755380015655 and parameters: {'booster': 'gbtree', 'lambda': 0.16722173767714482, 'alpha': 0.0008985365500864032, 'subsample': 0.5327174329102314, 'colsample_bytree': 0.5094907127896658, 'n_estimators': 700, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.043111607060602124, 'gamma': 0.008154755350276618, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 7.01806
[0]	validation_0-rmse:3.45845	validation_1-rmse:3.45520
[699]	validation_0-rmse:3.00520	validation_1-rmse:3.03697
Fold: 1/3 rmse score = 9.22321
[0]	validation_0-rmse:3.46067	validation_1-rmse:3.45071
[699]	validation_0-rmse:3.01054	validation_1-rmse:3.01937


[I 2023-10-09 21:35:19,558] Trial 26 pruned. 


Fold: 2/3 rmse score = 9.11661
[0]	validation_0-rmse:3.45355	validation_1-rmse:3.45070
[799]	validation_0-rmse:2.66187	validation_1-rmse:2.79572
Fold: 1/3 rmse score = 7.81606
[0]	validation_0-rmse:3.45595	validation_1-rmse:3.44602
[799]	validation_0-rmse:2.66474	validation_1-rmse:2.77665
Fold: 2/3 rmse score = 7.70976
[0]	validation_0-rmse:3.44826	validation_1-rmse:3.46143
[799]	validation_0-rmse:2.65965	validation_1-rmse:2.77420


[I 2023-10-09 21:36:05,193] Trial 27 finished with value: 7.7406757634166405 and parameters: {'booster': 'gbtree', 'lambda': 0.14479689100767912, 'alpha': 0.006868664536291057, 'subsample': 0.46767366363257573, 'colsample_bytree': 0.524657632087447, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.009617058320152439, 'gamma': 0.0004514879688277118, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 7.69620
[0]	validation_0-rmse:3.40346	validation_1-rmse:3.40284
[799]	validation_0-rmse:2.10345	validation_1-rmse:2.73962
Fold: 1/3 rmse score = 7.50328
[0]	validation_0-rmse:3.40725	validation_1-rmse:3.39811
[799]	validation_0-rmse:2.10765	validation_1-rmse:2.72898
Fold: 2/3 rmse score = 7.44733
[0]	validation_0-rmse:3.40111	validation_1-rmse:3.41354
[799]	validation_0-rmse:2.13364	validation_1-rmse:2.74820


[I 2023-10-09 21:36:38,337] Trial 28 finished with value: 7.500526125085725 and parameters: {'booster': 'gbtree', 'lambda': 0.0516596146413769, 'alpha': 0.0006481858861248336, 'subsample': 0.5463991854172147, 'colsample_bytree': 0.10223474724689552, 'n_estimators': 800, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.18614548992306282, 'gamma': 3.3492915962375857e-05, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 7.55098
[0]	validation_0-rmse:3.43584	validation_1-rmse:3.43431
[599]	validation_0-rmse:2.29764	validation_1-rmse:2.62637
Fold: 1/3 rmse score = 6.89784
[0]	validation_0-rmse:3.43914	validation_1-rmse:3.42936
[599]	validation_0-rmse:2.30412	validation_1-rmse:2.61890
Fold: 2/3 rmse score = 6.85865
[0]	validation_0-rmse:3.43132	validation_1-rmse:3.44436
[599]	validation_0-rmse:2.30390	validation_1-rmse:2.61816


[I 2023-10-09 21:37:12,204] Trial 29 finished with value: 6.870417450958154 and parameters: {'booster': 'gbtree', 'lambda': 0.023769531756863065, 'alpha': 8.445010251536444e-06, 'subsample': 0.7291652397302693, 'colsample_bytree': 0.5107012278751177, 'n_estimators': 600, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.03445368601965074, 'gamma': 0.012718878057972505, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 6.85476
[0]	validation_0-rmse:3.43857	validation_1-rmse:3.43659
[599]	validation_0-rmse:2.46227	validation_1-rmse:2.68553
Fold: 1/3 rmse score = 7.21205
[0]	validation_0-rmse:3.44141	validation_1-rmse:3.43157
[599]	validation_0-rmse:2.45711	validation_1-rmse:2.66392
Fold: 2/3 rmse score = 7.09649
[0]	validation_0-rmse:3.43390	validation_1-rmse:3.44687
[599]	validation_0-rmse:2.46905	validation_1-rmse:2.67516


[I 2023-10-09 21:37:41,070] Trial 30 finished with value: 7.15500122816369 and parameters: {'booster': 'gbtree', 'lambda': 0.3085827260335727, 'alpha': 1.4433417903557856e-06, 'subsample': 0.799570070261631, 'colsample_bytree': 0.6151916763211611, 'n_estimators': 600, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.03274068985500777, 'gamma': 0.012715605194523385, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 7.15646
[0]	validation_0-rmse:3.26605	validation_1-rmse:3.27807
[209]	validation_0-rmse:1.70495	validation_1-rmse:2.68826
Fold: 1/3 rmse score = 7.15337
[0]	validation_0-rmse:3.27240	validation_1-rmse:3.26462
[210]	validation_0-rmse:1.68660	validation_1-rmse:2.65983
Fold: 2/3 rmse score = 6.98978
[0]	validation_0-rmse:3.26467	validation_1-rmse:3.27777
[248]	validation_0-rmse:1.58055	validation_1-rmse:2.68503


[I 2023-10-09 21:37:56,833] Trial 31 finished with value: 7.086613665216192 and parameters: {'booster': 'gbtree', 'lambda': 0.030466687370478703, 'alpha': 7.017225858499583e-06, 'subsample': 0.6650452453953605, 'colsample_bytree': 0.5244166793339937, 'n_estimators': 700, 'max_depth': 9, 'min_child_weight': 4, 'eta': 0.32213376717949505, 'gamma': 0.01735868378640074, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.


Fold: 3/3 rmse score = 7.11669
[0]	validation_0-rmse:3.29903	validation_1-rmse:3.30841
[443]	validation_0-rmse:1.26057	validation_1-rmse:2.64712
Fold: 1/3 rmse score = 6.96124
[0]	validation_0-rmse:3.30477	validation_1-rmse:3.29688
[390]	validation_0-rmse:1.35974	validation_1-rmse:2.62238
Fold: 2/3 rmse score = 6.84743
[0]	validation_0-rmse:3.29736	validation_1-rmse:3.30914
[388]	validation_0-rmse:1.37164	validation_1-rmse:2.64271
Fold: 3/3 rmse score = 6.94901

[I 2023-10-09 21:38:22,448] Trial 32 finished with value: 6.919225157652879 and parameters: {'booster': 'gbtree', 'lambda': 0.02155279584240853, 'alpha': 7.807877935497696e-06, 'subsample': 0.676383439265815, 'colsample_bytree': 0.5277715161933307, 'n_estimators': 700, 'max_depth': 9, 'min_child_weight': 4, 'eta': 0.25845225878277317, 'gamma': 0.02422408722154315, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.858429981947185.



[0]	validation_0-rmse:3.43685	validation_1-rmse:3.43511
[799]	validation_0-rmse:2.15700	validation_1-rmse:2.57292
Fold: 1/3 rmse score = 6.61993
[0]	validation_0-rmse:3.43977	validation_1-rmse:3.43002
[799]	validation_0-rmse:2.16190	validation_1-rmse:2.55526
Fold: 2/3 rmse score = 6.52936
[0]	validation_0-rmse:3.43216	validation_1-rmse:3.44516
[799]	validation_0-rmse:2.16049	validation_1-rmse:2.56167


[I 2023-10-09 21:39:07,074] Trial 33 finished with value: 6.57048289641635 and parameters: {'booster': 'gbtree', 'lambda': 0.22707944407369127, 'alpha': 3.3037390422677014e-07, 'subsample': 0.830653105228792, 'colsample_bytree': 0.6290994791644161, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.03308164725533205, 'gamma': 0.0038662350915492165, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 6.57048289641635.


Fold: 3/3 rmse score = 6.56216
[0]	validation_0-rmse:3.24796	validation_1-rmse:3.26136
[219]	validation_0-rmse:1.57253	validation_1-rmse:2.62871
Fold: 1/3 rmse score = 6.84363
[0]	validation_0-rmse:3.25722	validation_1-rmse:3.25126
[235]	validation_0-rmse:1.51080	validation_1-rmse:2.61891
Fold: 2/3 rmse score = 6.76938
[0]	validation_0-rmse:3.25070	validation_1-rmse:3.26307
[237]	validation_0-rmse:1.49200	validation_1-rmse:2.60782


[I 2023-10-09 21:39:23,500] Trial 34 finished with value: 6.780456951377778 and parameters: {'booster': 'gbtree', 'lambda': 0.009587981167759196, 'alpha': 2.947986260136048e-07, 'subsample': 0.8117278549305914, 'colsample_bytree': 0.5998443770965566, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.3448028471898466, 'gamma': 0.0021820168283911063, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 6.57048289641635.


Fold: 3/3 rmse score = 6.72836
[0]	validation_0-rmse:3.45524	validation_1-rmse:3.45224
[999]	validation_0-rmse:2.66769	validation_1-rmse:2.79585
Fold: 1/3 rmse score = 7.81676
[0]	validation_0-rmse:3.45760	validation_1-rmse:3.44770
[999]	validation_0-rmse:2.66537	validation_1-rmse:2.77387
Fold: 2/3 rmse score = 7.69434
[0]	validation_0-rmse:3.44987	validation_1-rmse:3.46311
[999]	validation_0-rmse:2.66415	validation_1-rmse:2.77367


[I 2023-10-09 21:40:21,429] Trial 35 finished with value: 7.734785428794517 and parameters: {'booster': 'gbtree', 'lambda': 0.004786565859147131, 'alpha': 1.1245185942735929e-07, 'subsample': 0.8315009619221758, 'colsample_bytree': 0.6237519563316969, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.0069504381944753306, 'gamma': 0.0029352792219080365, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 6.57048289641635.


Fold: 3/3 rmse score = 7.69326
[0]	validation_0-rmse:3.02646	validation_1-rmse:3.04929
[143]	validation_0-rmse:2.94376	validation_1-rmse:2.98320
Fold: 1/3 rmse score = 11.90477
[0]	validation_0-rmse:3.03521	validation_1-rmse:3.03629
[143]	validation_0-rmse:2.95128	validation_1-rmse:2.96086
Fold: 2/3 rmse score = 11.87269
[0]	validation_0-rmse:3.02966	validation_1-rmse:3.04545
[223]	validation_0-rmse:3.01966	validation_1-rmse:3.04775


[I 2023-10-09 21:41:55,227] Trial 36 finished with value: 11.9160204552196 and parameters: {'booster': 'dart', 'lambda': 0.26517123397339787, 'alpha': 2.566089096302153e-07, 'subsample': 0.8666818207146816, 'colsample_bytree': 0.5750420093337872, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.7461631766019287, 'gamma': 0.00048521448872418344, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 0.8902235018104074, 'skip_drop': 1.702501716320659e-08}. Best is trial 33 with value: 6.57048289641635.


Fold: 3/3 rmse score = 11.97061
[0]	validation_0-rmse:3.43873	validation_1-rmse:3.43685
[799]	validation_0-rmse:2.20911	validation_1-rmse:2.58641
Fold: 1/3 rmse score = 6.68951
[0]	validation_0-rmse:3.44168	validation_1-rmse:3.43198
[799]	validation_0-rmse:2.20718	validation_1-rmse:2.57247
Fold: 2/3 rmse score = 6.61758
[0]	validation_0-rmse:3.43404	validation_1-rmse:3.44715
[799]	validation_0-rmse:2.21328	validation_1-rmse:2.57782


[I 2023-10-09 21:42:39,617] Trial 37 finished with value: 6.650747197722594 and parameters: {'booster': 'gbtree', 'lambda': 0.04909163384655094, 'alpha': 3.3215130937275525e-08, 'subsample': 0.7684122787990819, 'colsample_bytree': 0.6272537395911192, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.030240476585870976, 'gamma': 0.0017167437085989525, 'grow_policy': 'depthwise'}. Best is trial 33 with value: 6.57048289641635.


Fold: 3/3 rmse score = 6.64515
[0]	validation_0-rmse:3.34357	validation_1-rmse:3.34884
[899]	validation_0-rmse:1.23556	validation_1-rmse:2.50573
Fold: 1/3 rmse score = 6.27801
[0]	validation_0-rmse:3.34928	validation_1-rmse:3.33979
[899]	validation_0-rmse:1.22470	validation_1-rmse:2.48547
Fold: 2/3 rmse score = 6.17680
[0]	validation_0-rmse:3.34260	validation_1-rmse:3.35415
[899]	validation_0-rmse:1.24268	validation_1-rmse:2.48565


[I 2023-10-09 21:43:21,947] Trial 38 finished with value: 6.210107259489909 and parameters: {'booster': 'gbtree', 'lambda': 0.07250204120355709, 'alpha': 1.1246619726768121e-08, 'subsample': 0.9273351823222801, 'colsample_bytree': 0.6414805504892989, 'n_estimators': 900, 'max_depth': 8, 'min_child_weight': 6, 'eta': 0.1879923053372939, 'gamma': 0.0008368848262694474, 'grow_policy': 'depthwise'}. Best is trial 38 with value: 6.210107259489909.


Fold: 3/3 rmse score = 6.17551
[0]	validation_0-rmse:3.45973	validation_1-rmse:3.45642
[899]	validation_0-rmse:3.19334	validation_1-rmse:3.20547
Fold: 1/3 rmse score = 10.27501
[0]	validation_0-rmse:3.46194	validation_1-rmse:3.45199
[899]	validation_0-rmse:3.19783	validation_1-rmse:3.19214


[I 2023-10-09 22:06:35,544] Trial 39 pruned. 


Fold: 2/3 rmse score = 10.18975
[0]	validation_0-rmse:3.23872	validation_1-rmse:3.25333
[237]	validation_0-rmse:1.44911	validation_1-rmse:2.59321
Fold: 1/3 rmse score = 6.68227
[0]	validation_0-rmse:3.24810	validation_1-rmse:3.24123
[285]	validation_0-rmse:1.34564	validation_1-rmse:2.59254
Fold: 2/3 rmse score = 6.68632
[0]	validation_0-rmse:3.24187	validation_1-rmse:3.25446
[247]	validation_0-rmse:1.45059	validation_1-rmse:2.58646


[I 2023-10-09 22:06:53,834] Trial 40 finished with value: 6.670889710871557 and parameters: {'booster': 'gbtree', 'lambda': 0.013705838732395871, 'alpha': 5.52776268545982e-08, 'subsample': 0.9302107584062711, 'colsample_bytree': 0.6303873929704379, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.3638460178385569, 'gamma': 0.0009711625237139315, 'grow_policy': 'depthwise'}. Best is trial 38 with value: 6.210107259489909.


Fold: 3/3 rmse score = 6.64408
[0]	validation_0-rmse:3.26138	validation_1-rmse:3.27404
[409]	validation_0-rmse:1.15874	validation_1-rmse:2.57452
Fold: 1/3 rmse score = 6.60193
[0]	validation_0-rmse:3.26959	validation_1-rmse:3.26214
[396]	validation_0-rmse:1.17933	validation_1-rmse:2.57181
Fold: 2/3 rmse score = 6.57736
[0]	validation_0-rmse:3.26341	validation_1-rmse:3.27593
[398]	validation_0-rmse:1.17103	validation_1-rmse:2.56440
Fold: 3/3 rmse score = 6.54610

[I 2023-10-09 22:07:18,933] Trial 41 finished with value: 6.575129664123398 and parameters: {'booster': 'gbtree', 'lambda': 0.011768870475460406, 'alpha': 3.8255703517997675e-08, 'subsample': 0.9234679498231831, 'colsample_bytree': 0.6302212364207117, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.319540617656737, 'gamma': 0.0010698563523171669, 'grow_policy': 'depthwise'}. Best is trial 38 with value: 6.210107259489909.



[0]	validation_0-rmse:3.07984	validation_1-rmse:3.12017
[112]	validation_0-rmse:1.63878	validation_1-rmse:3.13923
Fold: 1/3 rmse score = 8.01667
[0]	validation_0-rmse:3.09428	validation_1-rmse:3.09621
[115]	validation_0-rmse:1.63449	validation_1-rmse:3.14174


[I 2023-10-09 22:07:25,955] Trial 42 pruned. 


Fold: 2/3 rmse score = 7.89198
[0]	validation_0-rmse:3.24085	validation_1-rmse:3.25316
[338]	validation_0-rmse:1.27042	validation_1-rmse:2.54450
Fold: 1/3 rmse score = 6.47114
[0]	validation_0-rmse:3.24873	validation_1-rmse:3.24199
[434]	validation_0-rmse:1.06112	validation_1-rmse:2.53820
Fold: 2/3 rmse score = 6.41407
[0]	validation_0-rmse:3.24259	validation_1-rmse:3.25485
[434]	validation_0-rmse:1.06847	validation_1-rmse:2.54169


[I 2023-10-09 22:07:51,122] Trial 43 finished with value: 6.436346351080733 and parameters: {'booster': 'gbtree', 'lambda': 0.002765527481257916, 'alpha': 1.3125027149309643e-08, 'subsample': 0.9989183387770914, 'colsample_bytree': 0.7003529190355362, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.33154625087010303, 'gamma': 0.00027327826944372683, 'grow_policy': 'depthwise'}. Best is trial 38 with value: 6.210107259489909.


Fold: 3/3 rmse score = 6.42383
[0]	validation_0-rmse:3.44661	validation_1-rmse:3.44414
[799]	validation_0-rmse:2.60874	validation_1-rmse:2.76411
Fold: 1/3 rmse score = 7.64029
[0]	validation_0-rmse:3.44932	validation_1-rmse:3.43937
[799]	validation_0-rmse:2.60406	validation_1-rmse:2.74441


[I 2023-10-09 22:08:15,553] Trial 44 pruned. 


Fold: 2/3 rmse score = 7.53179
[0]	validation_0-rmse:3.32826	validation_1-rmse:3.33138
[999]	validation_0-rmse:0.95813	validation_1-rmse:2.44948
Fold: 1/3 rmse score = 5.99794
[0]	validation_0-rmse:3.33478	validation_1-rmse:3.32673
[999]	validation_0-rmse:0.96790	validation_1-rmse:2.44246
Fold: 2/3 rmse score = 5.96561
[0]	validation_0-rmse:3.32766	validation_1-rmse:3.34165
[924]	validation_0-rmse:1.04103	validation_1-rmse:2.43475


[I 2023-10-09 22:49:02,976] Trial 45 finished with value: 5.962488010980658 and parameters: {'booster': 'dart', 'lambda': 0.002578975588016742, 'alpha': 1.3244432301036111e-08, 'subsample': 0.9983456796567938, 'colsample_bytree': 0.7040062969819831, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.16044236257401082, 'gamma': 4.4590714286410406e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 9.776329107921942e-05, 'skip_drop': 4.429240963896771e-06}. Best is trial 45 with value: 5.962488010980658.


Fold: 3/3 rmse score = 5.92392
[0]	validation_0-rmse:3.29597	validation_1-rmse:3.30115
[671]	validation_0-rmse:0.93164	validation_1-rmse:2.49688
Fold: 1/3 rmse score = 6.22372
[0]	validation_0-rmse:3.30276	validation_1-rmse:3.29678
[741]	validation_0-rmse:0.83713	validation_1-rmse:2.47790
Fold: 2/3 rmse score = 6.14237
[0]	validation_0-rmse:3.29634	validation_1-rmse:3.31133
[803]	validation_0-rmse:0.76482	validation_1-rmse:2.48173


[I 2023-10-09 23:13:06,399] Trial 46 finished with value: 6.172333548403238 and parameters: {'booster': 'dart', 'lambda': 0.0019835399598768263, 'alpha': 1.1588070781294029e-08, 'subsample': 0.8872254532751528, 'colsample_bytree': 0.7478038655308537, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.20328383360217034, 'gamma': 2.8456990097242074e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.0002460530034252267, 'skip_drop': 5.464381519276375e-06}. Best is trial 45 with value: 5.962488010980658.


Fold: 3/3 rmse score = 6.15091
[0]	validation_0-rmse:3.41926	validation_1-rmse:3.41756
[899]	validation_0-rmse:2.79108	validation_1-rmse:2.85837
Fold: 1/3 rmse score = 8.17018
[0]	validation_0-rmse:3.42243	validation_1-rmse:3.41142
[899]	validation_0-rmse:2.79712	validation_1-rmse:2.83977
Fold: 2/3 rmse score = 8.06427
[0]	validation_0-rmse:3.41466	validation_1-rmse:3.42745
[899]	validation_0-rmse:2.79821	validation_1-rmse:2.84669


[I 2023-10-09 23:46:11,241] Trial 47 finished with value: 8.11268967597698 and parameters: {'booster': 'dart', 'lambda': 0.00038194772873106615, 'alpha': 1.076412642115717e-08, 'subsample': 0.8802414239829476, 'colsample_bytree': 0.773490089693005, 'n_estimators': 900, 'max_depth': 3, 'min_child_weight': 3, 'eta': 0.10734400353641128, 'gamma': 5.82782372403684e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.0002022916011433227, 'skip_drop': 4.617737092098809e-06}. Best is trial 45 with value: 5.962488010980658.


Fold: 3/3 rmse score = 8.10362
[0]	validation_0-rmse:3.14676	validation_1-rmse:3.15776
[249]	validation_0-rmse:1.43798	validation_1-rmse:2.63684
Fold: 1/3 rmse score = 6.84296
[0]	validation_0-rmse:3.15785	validation_1-rmse:3.15290
[196]	validation_0-rmse:1.61332	validation_1-rmse:2.62752
Fold: 2/3 rmse score = 6.79896
[0]	validation_0-rmse:3.15462	validation_1-rmse:3.16588
[198]	validation_0-rmse:1.59510	validation_1-rmse:2.64453


[I 2023-10-09 23:48:24,977] Trial 48 finished with value: 6.8489854817684614 and parameters: {'booster': 'dart', 'lambda': 6.245080649354278e-05, 'alpha': 1.411695692872028e-08, 'subsample': 0.995283735646384, 'colsample_bytree': 0.7195388517886709, 'n_estimators': 1000, 'max_depth': 8, 'min_child_weight': 3, 'eta': 0.5125546374151384, 'gamma': 1.8147750606363866e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 1.8006089654833396e-05, 'skip_drop': 1.930448249063886e-06}. Best is trial 45 with value: 5.962488010980658.


Fold: 3/3 rmse score = 6.90504
[0]	validation_0-rmse:3.33796	validation_1-rmse:3.33910
[999]	validation_0-rmse:1.34112	validation_1-rmse:2.45680
Fold: 1/3 rmse score = 6.03420
[0]	validation_0-rmse:3.34407	validation_1-rmse:3.33534
[999]	validation_0-rmse:1.33359	validation_1-rmse:2.43993
Fold: 2/3 rmse score = 5.95298
[0]	validation_0-rmse:3.33753	validation_1-rmse:3.35021
[999]	validation_0-rmse:1.34875	validation_1-rmse:2.43985


[I 2023-10-10 00:29:46,293] Trial 49 finished with value: 5.980015642325394 and parameters: {'booster': 'dart', 'lambda': 0.0026467279365743053, 'alpha': 1.0648149016043716e-07, 'subsample': 0.9558810566556822, 'colsample_bytree': 0.6990757999761278, 'n_estimators': 1000, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.16652538854459067, 'gamma': 1.1418655594749022e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.0010395081599285753, 'skip_drop': 2.4105027920273678e-05}. Best is trial 45 with value: 5.962488010980658.


Fold: 3/3 rmse score = 5.95287


In [17]:
trial = study.best_trial

In [18]:
print(trial.params)

{'booster': 'dart', 'lambda': 0.002578975588016742, 'alpha': 1.3244432301036111e-08, 'subsample': 0.9983456796567938, 'colsample_bytree': 0.7040062969819831, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 6, 'eta': 0.16044236257401082, 'gamma': 4.4590714286410406e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 9.776329107921942e-05, 'skip_drop': 4.429240963896771e-06}


In [5]:
best_param = {
  'booster': 'dart',
  'lambda': 0.002578975588016742,
  'alpha': 1.3244432301036111e-08,
  'subsample': 0.9983456796567938,
  'colsample_bytree': 0.7040062969819831,
  'n_estimators': 1000,
  'max_depth': 9,
  'min_child_weight': 6,
  'eta': 0.16044236257401082,
  'gamma': 4.4590714286410406e-05,
  'grow_policy': 'depthwise',
  'sample_type': 'weighted',
  'normalize_type': 'tree',
  'rate_drop': 9.776329107921942e-05,
  'skip_drop': 4.429240963896771e-06
}

# val_scores, y_tests, models = train_model(best_param, train_x, train_y, train_null_df, test=True)

In [24]:
for idx, model in enumerate(models):
  file_name = f"./models/uwind_model_{idx}.model"
  model.save_model(file_name)

In [6]:
models = []
for i in range(3):
  model = create_model(best_param)
  model.load_model(f"models/uwind_model_{i}.model")
  models.append(model)

In [7]:
test_winds = []
for model in models:
  test_winds.append(model.predict(train_null_df))

In [8]:
train_null_df["U_WIND"] = np.mean(test_winds, axis=0)

In [9]:
assert sorted(train_df.columns) == sorted(train_null_df.columns)

In [10]:
result = pd.concat([train_df, train_null_df], axis=0).sort_index()

In [11]:
result.head(10)

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Tanker,U_WIND,WITlt0,WTI,day,hour,minute,month,weekday,year
0,0,0,0,1,0,0,0,0,0,0,...,0,0.14,0,-1.081668,15,4,3,10,3,2020
1,0,0,0,1,0,0,0,0,0,0,...,0,-3.16,0,-0.188614,17,2,55,9,1,2019
2,0,0,0,1,0,0,0,0,0,0,...,0,0.0,0,-0.305226,23,6,43,2,5,2019
3,0,0,0,0,0,0,0,1,0,0,...,0,4.34,0,-1.07438,18,22,6,9,4,2020
4,0,0,0,0,0,0,0,0,0,0,...,0,1.92,0,1.209272,13,12,57,8,5,2022
5,0,0,0,1,0,0,0,0,0,0,...,0,-0.600093,0,-0.84067,8,14,24,9,1,2015
6,0,0,0,0,0,0,0,1,0,0,...,0,-1.91,0,-0.50784,25,21,34,1,0,2021
7,0,0,0,0,0,0,0,1,0,0,...,0,0.0,0,-0.459737,18,3,48,6,1,2019
8,0,0,0,0,0,0,0,1,0,0,...,0,0.195547,0,-0.488404,27,6,23,1,4,2017
9,0,0,0,0,0,0,0,0,0,0,...,0,1.886073,0,0.008168,3,7,8,2,5,2018


In [12]:
result.to_parquet("./data/train_v2_2u.parquet")

In [13]:
test_winds = []
for model in models:
  test_winds.append(model.predict(test_df))

In [14]:
test_df["U_WIND"] = np.mean(test_winds, axis=0)

In [15]:
test_df.to_parquet("./data/test_v2_2u.parquet")