In [1]:
import warnings
warnings.filterwarnings(action="ignore")
import datetime
import time
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import HyperbandPruner

In [2]:
RANDOM_SEED = 99

In [None]:
tmp = pd.read_parquet("./data/test_v2_2.parquet")
tmp["V_WIND"].isna().sum()

In [3]:
train_df = pd.read_parquet('./data/train_v2_2_nn.parquet').drop(columns=["U_WIND"])
train_null_df = pd.read_parquet('./data/train_v2_2_n.parquet').drop(columns=["U_WIND", "V_WIND"])
test_df = pd.read_parquet('./data/test_v2_2.parquet').drop(columns=["U_WIND", "V_WIND"])

In [4]:
train_x = train_df.drop(columns=["V_WIND"])
train_y = train_df["V_WIND"]

In [5]:
def create_model(param):
    return xgb.XGBRegressor(
        random_state=RANDOM_SEED,
        tree_method="gpu_hist",
        eval_metric="rmse",
        early_stopping_rounds=100,
        **param
    )

In [6]:
def train_model(param, X, y, X_test, trial=None, test=False):
  skf = KFold(n_splits=3, shuffle=True, random_state=RANDOM_SEED)

  val_scores = []
  y_tests = []
  models = []

  for idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):

    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

    model = create_model(param)
    model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=1000)

    y_hat_val = model.predict(X_val)
    score = mean_squared_error(y_val.values, y_hat_val, squared=False)
    val_scores.append(score)
    print(f'Fold: {idx+1}/3 rmse score = {score:.5f}')

    if test:
      y_tests.append(model.predict(X_test))
      models.append(model)

    if trial:
      trial.report(score, idx)

      if trial.should_prune():
        raise optuna.TrialPruned()

  return val_scores, y_tests, models

In [7]:
def objective_xgb(trial):
  param = {
    "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
    "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
    "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    "subsample": trial.suggest_float("subsample", 0.1, 1.0),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
    "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
    "max_depth": trial.suggest_int("max_depth", 3, 9),
    "min_child_weight": trial.suggest_int("min_child_weight", 2, 10),
    "eta": trial.suggest_float("eta", 1e-8, 1.0, log=True),
    "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
    "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
  }

  if param["booster"] == "dart":
    param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
    param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
    param['rate_drop'] = trial.suggest_float('rate_drop', 1e-8, 1.0, log=True)
    param['skip_drop'] = trial.suggest_float('skip_drop', 1e-8, 1.0, log=True)

  val_scores, y_tests, models = train_model(param, train_x, train_y, test_df, trial)

  return sum(val_scores) / len(val_scores)

In [8]:
study = optuna.create_study(
    sampler=TPESampler(seed=RANDOM_SEED),
    direction='minimize',
    study_name='xgb_tuning',
    pruner=HyperbandPruner(
      min_resource=1, max_resource=8, reduction_factor=3
    ),
)

study.optimize(objective_xgb, n_trials=50)

[I 2023-10-10 02:34:29,905] A new study created in memory with name: xgb_tuning


[0]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
[299]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
Fold: 1/3 rmse score = 3.80904
[0]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
[299]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
Fold: 2/3 rmse score = 3.82636
[0]	validation_0-rmse:3.81771	validation_1-rmse:3.80126
[299]	validation_0-rmse:3.81771	validation_1-rmse:3.80126


[I 2023-10-10 02:34:41,698] Trial 0 finished with value: 3.8122195634844673 and parameters: {'booster': 'gbtree', 'lambda': 0.04017550942202233, 'alpha': 1.7847241527490656e-08, 'subsample': 0.8272449670283629, 'colsample_bytree': 0.6090556776494775, 'n_estimators': 300, 'max_depth': 3, 'min_child_weight': 10, 'eta': 1.1339812255777714e-08, 'gamma': 0.014399394343769443, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 3.8122195634844673.


Fold: 3/3 rmse score = 3.80126
[0]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
[899]	validation_0-rmse:3.81306	validation_1-rmse:3.80827
Fold: 1/3 rmse score = 3.80827
[0]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
[899]	validation_0-rmse:3.80438	validation_1-rmse:3.82559
Fold: 2/3 rmse score = 3.82559
[0]	validation_0-rmse:3.81771	validation_1-rmse:3.80126
[899]	validation_0-rmse:3.81693	validation_1-rmse:3.80049


[I 2023-10-10 03:07:43,411] Trial 1 finished with value: 3.8114476078947725 and parameters: {'booster': 'dart', 'lambda': 1.4575798463318516e-05, 'alpha': 0.6189425995687715, 'subsample': 0.5719732438377161, 'colsample_bytree': 0.1842517839679773, 'n_estimators': 900, 'max_depth': 4, 'min_child_weight': 6, 'eta': 2.178476742104051e-06, 'gamma': 0.033817296080697445, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 1.4193450623251104e-07, 'skip_drop': 4.983900949448835e-07}. Best is trial 1 with value: 3.8114476078947725.


Fold: 3/3 rmse score = 3.80049
[0]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
[199]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
Fold: 1/3 rmse score = 3.80904
[0]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
[199]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
Fold: 2/3 rmse score = 3.82636
[0]	validation_0-rmse:3.81771	validation_1-rmse:3.80126
[199]	validation_0-rmse:3.81771	validation_1-rmse:3.80125


[I 2023-10-10 03:08:38,912] Trial 2 finished with value: 3.812217111513013 and parameters: {'booster': 'gbtree', 'lambda': 7.591896035963056e-07, 'alpha': 1.128298527729365e-08, 'subsample': 0.9087797683651598, 'colsample_bytree': 0.597010984189897, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 6, 'eta': 2.1580835301822618e-08, 'gamma': 0.00015813016339763232, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 3.8114476078947725.


Fold: 3/3 rmse score = 3.80125
[0]	validation_0-rmse:3.61866	validation_1-rmse:3.61562
[399]	validation_0-rmse:2.83531	validation_1-rmse:2.99752
Fold: 1/3 rmse score = 2.99689
[0]	validation_0-rmse:3.61168	validation_1-rmse:3.63244
[399]	validation_0-rmse:2.83097	validation_1-rmse:3.01457
Fold: 2/3 rmse score = 3.01396
[0]	validation_0-rmse:3.62030	validation_1-rmse:3.60452
[399]	validation_0-rmse:2.83352	validation_1-rmse:3.00929


[I 2023-10-10 03:08:53,598] Trial 3 finished with value: 3.0067140304468176 and parameters: {'booster': 'gbtree', 'lambda': 0.5574271302327638, 'alpha': 5.369144319173959e-05, 'subsample': 0.3376820528693848, 'colsample_bytree': 0.5944405760344648, 'n_estimators': 400, 'max_depth': 4, 'min_child_weight': 10, 'eta': 0.32615150214792754, 'gamma': 0.05011280824271637, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 3.0067140304468176.


Fold: 3/3 rmse score = 3.00929
[0]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
[699]	validation_0-rmse:3.81372	validation_1-rmse:3.80893
Fold: 1/3 rmse score = 3.80893
[0]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
[699]	validation_0-rmse:3.80505	validation_1-rmse:3.82626
Fold: 2/3 rmse score = 3.82626
[0]	validation_0-rmse:3.81771	validation_1-rmse:3.80126
[699]	validation_0-rmse:3.81760	validation_1-rmse:3.80115


[I 2023-10-10 03:09:18,272] Trial 4 finished with value: 3.812113942475524 and parameters: {'booster': 'gbtree', 'lambda': 1.9040058970389597e-05, 'alpha': 0.0013569405041690126, 'subsample': 0.9788061899951741, 'colsample_bytree': 0.7347595233221714, 'n_estimators': 700, 'max_depth': 4, 'min_child_weight': 4, 'eta': 1.9058143481065904e-07, 'gamma': 2.700633373820546e-06, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 3.0067140304468176.


Fold: 3/3 rmse score = 3.80115
[0]	validation_0-rmse:3.81363	validation_1-rmse:3.80884
[999]	validation_0-rmse:3.78893	validation_1-rmse:3.78376
Fold: 1/3 rmse score = 3.78376
[0]	validation_0-rmse:3.80495	validation_1-rmse:3.82616
[999]	validation_0-rmse:3.78010	validation_1-rmse:3.80121


[I 2023-10-10 03:36:02,091] Trial 5 pruned. 


Fold: 2/3 rmse score = 3.80121
[0]	validation_0-rmse:3.81318	validation_1-rmse:3.80840
[599]	validation_0-rmse:3.47275	validation_1-rmse:3.46873
Fold: 1/3 rmse score = 3.46873
[0]	validation_0-rmse:3.80449	validation_1-rmse:3.82571
[599]	validation_0-rmse:3.46415	validation_1-rmse:3.48484


[I 2023-10-10 03:36:16,921] Trial 6 pruned. 


Fold: 2/3 rmse score = 3.48484
[0]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
[899]	validation_0-rmse:3.81042	validation_1-rmse:3.80565
Fold: 1/3 rmse score = 3.80565
[0]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
[899]	validation_0-rmse:3.80175	validation_1-rmse:3.82294


[I 2023-10-10 03:56:59,384] Trial 7 pruned. 


Fold: 2/3 rmse score = 3.82294
[0]	validation_0-rmse:3.81324	validation_1-rmse:3.80844
[799]	validation_0-rmse:3.58637	validation_1-rmse:3.58176
Fold: 1/3 rmse score = 3.58176
[0]	validation_0-rmse:3.80455	validation_1-rmse:3.82575
[799]	validation_0-rmse:3.57740	validation_1-rmse:3.59816
Fold: 2/3 rmse score = 3.59816
[0]	validation_0-rmse:3.81710	validation_1-rmse:3.80066
[799]	validation_0-rmse:3.58684	validation_1-rmse:3.57638


[I 2023-10-10 04:22:57,882] Trial 8 finished with value: 3.585431180840219 and parameters: {'booster': 'dart', 'lambda': 0.016269894265187258, 'alpha': 0.00027473371985831937, 'subsample': 0.7205784430671397, 'colsample_bytree': 0.32422360794447835, 'n_estimators': 800, 'max_depth': 5, 'min_child_weight': 7, 'eta': 0.000640160651175959, 'gamma': 0.0007690362020385368, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 2.469722638494642e-07, 'skip_drop': 0.0028449094047407666}. Best is trial 3 with value: 3.0067140304468176.


Fold: 3/3 rmse score = 3.57638
[0]	validation_0-rmse:3.81383	validation_1-rmse:3.80904
[699]	validation_0-rmse:3.81379	validation_1-rmse:3.80900
Fold: 1/3 rmse score = 3.80900
[0]	validation_0-rmse:3.80515	validation_1-rmse:3.82636
[699]	validation_0-rmse:3.80511	validation_1-rmse:3.82632


[I 2023-10-10 04:36:45,018] Trial 9 pruned. 


Fold: 2/3 rmse score = 3.82632
[0]	validation_0-rmse:3.33775	validation_1-rmse:3.34329
[103]	validation_0-rmse:3.70719	validation_1-rmse:4.24449
Fold: 1/3 rmse score = 3.17549
[0]	validation_0-rmse:3.33790	validation_1-rmse:3.36629
[103]	validation_0-rmse:3.89160	validation_1-rmse:4.56461
Fold: 2/3 rmse score = 3.18149
[0]	validation_0-rmse:3.32235	validation_1-rmse:3.33232
[103]	validation_0-rmse:3.93357	validation_1-rmse:4.55323


[I 2023-10-10 04:36:57,364] Trial 10 finished with value: 3.1727245179427306 and parameters: {'booster': 'gbtree', 'lambda': 0.20136472246731266, 'alpha': 0.0037294959598213996, 'subsample': 0.1136943669843487, 'colsample_bytree': 0.9836482175958339, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.5421827357095554, 'gamma': 0.6491750314063931, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 3.0067140304468176.


Fold: 3/3 rmse score = 3.16119
[0]	validation_0-rmse:3.24944	validation_1-rmse:3.25471
[101]	validation_0-rmse:3544.09311	validation_1-rmse:4077.43551
Fold: 1/3 rmse score = 3.24475
[0]	validation_0-rmse:3.25335	validation_1-rmse:3.28846
[99]	validation_0-rmse:11046.22710	validation_1-rmse:12135.78669
Fold: 2/3 rmse score = 3.28846
[0]	validation_0-rmse:3.25079	validation_1-rmse:3.27501
[99]	validation_0-rmse:7199.98703	validation_1-rmse:8154.85512


[I 2023-10-10 04:37:08,028] Trial 11 finished with value: 3.2694077787586706 and parameters: {'booster': 'gbtree', 'lambda': 0.6915180707346773, 'alpha': 0.007676817778163076, 'subsample': 0.10260407700657599, 'colsample_bytree': 0.9303022934183071, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.989503289598702, 'gamma': 0.7293916524775244, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 3.0067140304468176.


Fold: 3/3 rmse score = 3.27501
[0]	validation_0-rmse:3.25889	validation_1-rmse:3.26491
[114]	validation_0-rmse:2.64071	validation_1-rmse:3.31252
Fold: 1/3 rmse score = 3.03940
[0]	validation_0-rmse:3.26218	validation_1-rmse:3.28066
[116]	validation_0-rmse:2.61887	validation_1-rmse:3.29731
Fold: 2/3 rmse score = 3.07517
[0]	validation_0-rmse:3.24986	validation_1-rmse:3.26034
[133]	validation_0-rmse:2.53857	validation_1-rmse:3.33107


[I 2023-10-10 04:37:21,086] Trial 12 finished with value: 3.064375314912143 and parameters: {'booster': 'gbtree', 'lambda': 0.9390089759158639, 'alpha': 0.0085597513515605, 'subsample': 0.4052937943893192, 'colsample_bytree': 0.965774787473821, 'n_estimators': 400, 'max_depth': 7, 'min_child_weight': 2, 'eta': 0.6887165204615721, 'gamma': 0.998337799421535, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 3.0067140304468176.


Fold: 3/3 rmse score = 3.07856
[0]	validation_0-rmse:3.74725	validation_1-rmse:3.74288
[99]	validation_0-rmse:2.93674	validation_1-rmse:2.99265
Fold: 1/3 rmse score = 2.99265
[0]	validation_0-rmse:3.73992	validation_1-rmse:3.76062
[99]	validation_0-rmse:2.93493	validation_1-rmse:3.00040
Fold: 2/3 rmse score = 3.00040
[0]	validation_0-rmse:3.74951	validation_1-rmse:3.73539
[99]	validation_0-rmse:2.92494	validation_1-rmse:2.99578


[I 2023-10-10 04:37:32,819] Trial 13 finished with value: 2.9962772071571564 and parameters: {'booster': 'gbtree', 'lambda': 0.006004222319263677, 'alpha': 5.162400489170195e-05, 'subsample': 0.4220775521125347, 'colsample_bytree': 0.7575724832065093, 'n_estimators': 100, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.05933074855108428, 'gamma': 0.6683362526305419, 'grow_policy': 'lossguide'}. Best is trial 13 with value: 2.9962772071571564.


Fold: 3/3 rmse score = 2.99578
[0]	validation_0-rmse:3.79736	validation_1-rmse:3.79275
[199]	validation_0-rmse:3.05534	validation_1-rmse:3.07348
Fold: 1/3 rmse score = 3.07348
[0]	validation_0-rmse:3.78858	validation_1-rmse:3.80977
[199]	validation_0-rmse:3.04715	validation_1-rmse:3.08125


[I 2023-10-10 04:37:42,688] Trial 14 pruned. 


Fold: 2/3 rmse score = 3.08125
[0]	validation_0-rmse:3.80093	validation_1-rmse:3.79648
[99]	validation_0-rmse:3.09105	validation_1-rmse:3.11150
Fold: 1/3 rmse score = 3.11150
[0]	validation_0-rmse:3.79206	validation_1-rmse:3.81334
[99]	validation_0-rmse:3.08756	validation_1-rmse:3.12533
Fold: 2/3 rmse score = 3.12533
[0]	validation_0-rmse:3.80477	validation_1-rmse:3.78854
[99]	validation_0-rmse:3.08658	validation_1-rmse:3.11370


[I 2023-10-10 04:37:55,062] Trial 15 finished with value: 3.1168445282627526 and parameters: {'booster': 'gbtree', 'lambda': 0.0011668726866326906, 'alpha': 2.4532714746101135e-06, 'subsample': 0.5039745036025409, 'colsample_bytree': 0.42975667103935644, 'n_estimators': 100, 'max_depth': 7, 'min_child_weight': 10, 'eta': 0.024406118442007997, 'gamma': 0.0018815303958341514, 'grow_policy': 'lossguide'}. Best is trial 13 with value: 2.9962772071571564.


Fold: 3/3 rmse score = 3.11370
[0]	validation_0-rmse:3.78324	validation_1-rmse:3.77899
[99]	validation_0-rmse:3.00424	validation_1-rmse:3.03527
Fold: 1/3 rmse score = 3.03527
[0]	validation_0-rmse:3.77481	validation_1-rmse:3.79614
[99]	validation_0-rmse:2.99826	validation_1-rmse:3.04179
Fold: 2/3 rmse score = 3.04179
[0]	validation_0-rmse:3.78652	validation_1-rmse:3.77036
[99]	validation_0-rmse:2.99443	validation_1-rmse:3.03884


[I 2023-10-10 04:38:07,013] Trial 16 finished with value: 3.038632714627633 and parameters: {'booster': 'gbtree', 'lambda': 0.07141878435608462, 'alpha': 0.00026488440103044443, 'subsample': 0.3730607155442449, 'colsample_bytree': 0.7127459098026165, 'n_estimators': 100, 'max_depth': 7, 'min_child_weight': 8, 'eta': 0.036226922460954804, 'gamma': 0.08881198944711519, 'grow_policy': 'lossguide'}. Best is trial 13 with value: 2.9962772071571564.


Fold: 3/3 rmse score = 3.03884
[0]	validation_0-rmse:3.81294	validation_1-rmse:3.80815
[499]	validation_0-rmse:3.49205	validation_1-rmse:3.48880
Fold: 1/3 rmse score = 3.48880
[0]	validation_0-rmse:3.80426	validation_1-rmse:3.82547
[499]	validation_0-rmse:3.48243	validation_1-rmse:3.50612


[I 2023-10-10 04:38:21,805] Trial 17 pruned. 


Fold: 2/3 rmse score = 3.50612
[0]	validation_0-rmse:3.71772	validation_1-rmse:3.71663
[199]	validation_0-rmse:2.45630	validation_1-rmse:2.84396
Fold: 1/3 rmse score = 2.84396
[0]	validation_0-rmse:3.70897	validation_1-rmse:3.73351
[199]	validation_0-rmse:2.45299	validation_1-rmse:2.85147
Fold: 2/3 rmse score = 2.85045
[0]	validation_0-rmse:3.71919	validation_1-rmse:3.70628
[199]	validation_0-rmse:2.43696	validation_1-rmse:2.84520


[I 2023-10-10 04:38:35,852] Trial 18 finished with value: 2.8465353441020986 and parameters: {'booster': 'gbtree', 'lambda': 0.014215474747669994, 'alpha': 9.61336616375633e-05, 'subsample': 0.3806463091396616, 'colsample_bytree': 0.46267397765136364, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.10886823551840409, 'gamma': 0.17468028581635373, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 2.8465353441020986.


Fold: 3/3 rmse score = 2.84520
[0]	validation_0-rmse:3.81378	validation_1-rmse:3.80899
[199]	validation_0-rmse:3.80278	validation_1-rmse:3.79846
Fold: 1/3 rmse score = 3.79846
[0]	validation_0-rmse:3.80510	validation_1-rmse:3.82631
[199]	validation_0-rmse:3.79418	validation_1-rmse:3.81565


[I 2023-10-10 04:38:47,789] Trial 19 pruned. 


Fold: 2/3 rmse score = 3.81565
[0]	validation_0-rmse:3.76854	validation_1-rmse:3.76445
[99]	validation_0-rmse:2.85725	validation_1-rmse:2.94635
Fold: 1/3 rmse score = 2.94635
[0]	validation_0-rmse:3.75949	validation_1-rmse:3.78136
[99]	validation_0-rmse:2.85393	validation_1-rmse:2.95307
Fold: 2/3 rmse score = 2.95307
[0]	validation_0-rmse:3.77162	validation_1-rmse:3.75600
[99]	validation_0-rmse:2.84212	validation_1-rmse:2.94026


[I 2023-10-10 04:38:56,590] Trial 20 finished with value: 2.9465626613379565 and parameters: {'booster': 'gbtree', 'lambda': 0.011087719255427736, 'alpha': 5.018491756923399e-07, 'subsample': 0.5697666168092502, 'colsample_bytree': 0.4526348023484376, 'n_estimators': 100, 'max_depth': 8, 'min_child_weight': 3, 'eta': 0.05240567066567658, 'gamma': 0.00618944227597624, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 2.8465353441020986.


Fold: 3/3 rmse score = 2.94026
[0]	validation_0-rmse:3.73357	validation_1-rmse:3.72993
[99]	validation_0-rmse:2.75213	validation_1-rmse:2.89514
Fold: 1/3 rmse score = 2.89514
[0]	validation_0-rmse:3.72419	validation_1-rmse:3.74651
[99]	validation_0-rmse:2.75218	validation_1-rmse:2.90695
Fold: 2/3 rmse score = 2.90695
[0]	validation_0-rmse:3.73605	validation_1-rmse:3.72123
[99]	validation_0-rmse:2.74128	validation_1-rmse:2.90123


[I 2023-10-10 04:39:05,051] Trial 21 finished with value: 2.901105479399305 and parameters: {'booster': 'gbtree', 'lambda': 0.012310514950465895, 'alpha': 2.5297848484917434e-07, 'subsample': 0.5675666338462304, 'colsample_bytree': 0.4477623176848723, 'n_estimators': 100, 'max_depth': 8, 'min_child_weight': 3, 'eta': 0.09446152176248207, 'gamma': 0.010885740882235418, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 2.8465353441020986.


Fold: 3/3 rmse score = 2.90123
[0]	validation_0-rmse:3.81014	validation_1-rmse:3.80542
[299]	validation_0-rmse:3.18950	validation_1-rmse:3.21315
Fold: 1/3 rmse score = 3.21315
[0]	validation_0-rmse:3.80146	validation_1-rmse:3.82271
[299]	validation_0-rmse:3.18643	validation_1-rmse:3.22602


[I 2023-10-10 04:39:18,087] Trial 22 pruned. 


Fold: 2/3 rmse score = 3.22602
[0]	validation_0-rmse:3.72943	validation_1-rmse:3.72960
[99]	validation_0-rmse:2.54384	validation_1-rmse:2.85372
Fold: 1/3 rmse score = 2.85372
[0]	validation_0-rmse:3.71919	validation_1-rmse:3.74402
[99]	validation_0-rmse:2.55168	validation_1-rmse:2.85489
Fold: 2/3 rmse score = 2.85489
[0]	validation_0-rmse:3.73260	validation_1-rmse:3.72073
[99]	validation_0-rmse:2.54272	validation_1-rmse:2.85392


[I 2023-10-10 04:39:27,373] Trial 23 finished with value: 2.854174295441513 and parameters: {'booster': 'gbtree', 'lambda': 0.12555278479182064, 'alpha': 3.1512577498233165e-07, 'subsample': 0.5084078987322909, 'colsample_bytree': 0.38472006252826524, 'n_estimators': 100, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.14891787413827814, 'gamma': 0.007159518237894169, 'grow_policy': 'depthwise'}. Best is trial 18 with value: 2.8465353441020986.


Fold: 3/3 rmse score = 2.85392
[0]	validation_0-rmse:3.72506	validation_1-rmse:3.72511
[299]	validation_0-rmse:2.13910	validation_1-rmse:2.82885
Fold: 1/3 rmse score = 2.82823
[0]	validation_0-rmse:3.71520	validation_1-rmse:3.73950
[299]	validation_0-rmse:2.14101	validation_1-rmse:2.82067
Fold: 2/3 rmse score = 2.82067
[0]	validation_0-rmse:3.72822	validation_1-rmse:3.71636
[299]	validation_0-rmse:2.11402	validation_1-rmse:2.82180
Fold: 3/3 rmse score = 2.82091


[I 2023-10-10 04:39:45,850] Trial 24 finished with value: 2.823269664161476 and parameters: {'booster': 'gbtree', 'lambda': 0.09815736465238595, 'alpha': 1.2092512031467662e-07, 'subsample': 0.504219936016216, 'colsample_bytree': 0.37201743695785855, 'n_estimators': 300, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.1578980359058606, 'gamma': 0.0005834703605618417, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 2.823269664161476.


[0]	validation_0-rmse:3.80551	validation_1-rmse:3.80117
[299]	validation_0-rmse:2.86869	validation_1-rmse:2.96690
Fold: 1/3 rmse score = 2.96690
[0]	validation_0-rmse:3.79685	validation_1-rmse:3.81824
[299]	validation_0-rmse:2.86483	validation_1-rmse:2.97797
Fold: 2/3 rmse score = 2.97797
[0]	validation_0-rmse:3.80938	validation_1-rmse:3.79332
[299]	validation_0-rmse:2.86391	validation_1-rmse:2.96919
Fold: 3/3 rmse score = 2.96919

[I 2023-10-10 04:40:07,222] Trial 25 finished with value: 2.971353241388389 and parameters: {'booster': 'gbtree', 'lambda': 0.18897882718285983, 'alpha': 5.980696896371173e-08, 'subsample': 0.4626405825423001, 'colsample_bytree': 0.3393097142474527, 'n_estimators': 300, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.013748268961200122, 'gamma': 0.000567757651352747, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 2.823269664161476.



[0]	validation_0-rmse:3.57502	validation_1-rmse:3.57927
[199]	validation_0-rmse:2.23143	validation_1-rmse:2.85342
Fold: 1/3 rmse score = 2.85175
[0]	validation_0-rmse:3.56020	validation_1-rmse:3.58595
[199]	validation_0-rmse:2.23762	validation_1-rmse:2.85951
Fold: 2/3 rmse score = 2.85852
[0]	validation_0-rmse:3.57445	validation_1-rmse:3.57157
[199]	validation_0-rmse:2.24260	validation_1-rmse:2.86196


[I 2023-10-10 04:42:14,520] Trial 26 finished with value: 2.856179844544743 and parameters: {'booster': 'dart', 'lambda': 0.0810677342093585, 'alpha': 8.064195522087121e-08, 'subsample': 0.504610219998296, 'colsample_bytree': 0.37691575295294455, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.21041182606877276, 'gamma': 2.9906842660663708e-05, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 0.0002568510859414031, 'skip_drop': 0.5811313292940187}. Best is trial 24 with value: 2.823269664161476.


Fold: 3/3 rmse score = 2.85827
[0]	validation_0-rmse:3.71888	validation_1-rmse:3.71916
[499]	validation_0-rmse:1.78397	validation_1-rmse:2.79145
Fold: 1/3 rmse score = 2.79017
[0]	validation_0-rmse:3.71074	validation_1-rmse:3.73466
[499]	validation_0-rmse:1.78859	validation_1-rmse:2.79559
Fold: 2/3 rmse score = 2.79383
[0]	validation_0-rmse:3.72237	validation_1-rmse:3.71028
[499]	validation_0-rmse:1.75380	validation_1-rmse:2.77863


[I 2023-10-10 04:42:42,184] Trial 27 finished with value: 2.787380102088743 and parameters: {'booster': 'gbtree', 'lambda': 0.14479689100767912, 'alpha': 7.92381806010771e-07, 'subsample': 0.6453836747365032, 'colsample_bytree': 0.2664115183708486, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 5, 'eta': 0.16850445025673047, 'gamma': 0.15795346186447623, 'grow_policy': 'depthwise'}. Best is trial 27 with value: 2.787380102088743.


Fold: 3/3 rmse score = 2.77814
[0]	validation_0-rmse:3.80856	validation_1-rmse:3.80398
[499]	validation_0-rmse:2.89557	validation_1-rmse:2.99596
Fold: 1/3 rmse score = 2.99596
[0]	validation_0-rmse:3.79994	validation_1-rmse:3.82120
[499]	validation_0-rmse:2.89345	validation_1-rmse:3.00587


[I 2023-10-10 04:43:03,553] Trial 28 pruned. 


Fold: 2/3 rmse score = 3.00587
[0]	validation_0-rmse:3.66993	validation_1-rmse:3.66847
[299]	validation_0-rmse:2.18398	validation_1-rmse:2.77229
Fold: 1/3 rmse score = 2.77200
[0]	validation_0-rmse:3.66184	validation_1-rmse:3.68519
[299]	validation_0-rmse:2.18793	validation_1-rmse:2.77499
Fold: 2/3 rmse score = 2.77470
[0]	validation_0-rmse:3.67329	validation_1-rmse:3.66001
[299]	validation_0-rmse:2.17930	validation_1-rmse:2.77362


[I 2023-10-10 04:43:19,892] Trial 29 finished with value: 2.7733296875197753 and parameters: {'booster': 'gbtree', 'lambda': 0.03274908508749274, 'alpha': 3.0186392044312193e-08, 'subsample': 0.6616542239739656, 'colsample_bytree': 0.5443485405403076, 'n_estimators': 300, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.1724460501856256, 'gamma': 0.03632728621602792, 'grow_policy': 'depthwise'}. Best is trial 29 with value: 2.7733296875197753.


Fold: 3/3 rmse score = 2.77329
[0]	validation_0-rmse:3.60810	validation_1-rmse:3.60744
[428]	validation_0-rmse:1.76157	validation_1-rmse:2.83002
Fold: 1/3 rmse score = 2.81675
[0]	validation_0-rmse:3.59961	validation_1-rmse:3.62404
[350]	validation_0-rmse:1.88442	validation_1-rmse:2.81837
Fold: 2/3 rmse score = 2.81495
[0]	validation_0-rmse:3.61070	validation_1-rmse:3.59876
[411]	validation_0-rmse:1.79692	validation_1-rmse:2.82439
Fold: 3/3 rmse score = 2.81236


[I 2023-10-10 04:43:40,237] Trial 30 finished with value: 2.814684604907814 and parameters: {'booster': 'gbtree', 'lambda': 0.03649669296703047, 'alpha': 1.920918588442395e-08, 'subsample': 0.6671787223237672, 'colsample_bytree': 0.5025746290769142, 'n_estimators': 600, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.2583106141827791, 'gamma': 0.020999995615863645, 'grow_policy': 'depthwise'}. Best is trial 29 with value: 2.7733296875197753.


[0]	validation_0-rmse:3.58674	validation_1-rmse:3.58694
[349]	validation_0-rmse:1.84323	validation_1-rmse:2.84287
Fold: 1/3 rmse score = 2.83248
[0]	validation_0-rmse:3.57954	validation_1-rmse:3.60447
[351]	validation_0-rmse:1.83746	validation_1-rmse:2.83934
Fold: 2/3 rmse score = 2.83075
[0]	validation_0-rmse:3.58953	validation_1-rmse:3.57848
[335]	validation_0-rmse:1.88142	validation_1-rmse:2.83675


[I 2023-10-10 04:43:58,537] Trial 31 finished with value: 2.8298123034989318 and parameters: {'booster': 'gbtree', 'lambda': 0.045574178274280064, 'alpha': 3.26104979003937e-08, 'subsample': 0.6650452453953605, 'colsample_bytree': 0.5290553785519729, 'n_estimators': 600, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.28627440753582795, 'gamma': 0.021669165644505856, 'grow_policy': 'depthwise'}. Best is trial 29 with value: 2.7733296875197753.


Fold: 3/3 rmse score = 2.82621
[0]	validation_0-rmse:3.69215	validation_1-rmse:3.68988
[499]	validation_0-rmse:1.95167	validation_1-rmse:2.71315
Fold: 1/3 rmse score = 2.71292
[0]	validation_0-rmse:3.68376	validation_1-rmse:3.70680
[499]	validation_0-rmse:1.96213	validation_1-rmse:2.72971
Fold: 2/3 rmse score = 2.72971
[0]	validation_0-rmse:3.69538	validation_1-rmse:3.68221
[499]	validation_0-rmse:1.95677	validation_1-rmse:2.72384


[I 2023-10-10 04:44:23,161] Trial 32 finished with value: 2.722068574823022 and parameters: {'booster': 'gbtree', 'lambda': 0.23579796508004802, 'alpha': 1.4192374445109695e-08, 'subsample': 0.7510062021379931, 'colsample_bytree': 0.5349070688008934, 'n_estimators': 500, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.14386635988710209, 'gamma': 0.055003939902461906, 'grow_policy': 'depthwise'}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 2.72357
[0]	validation_0-rmse:3.40770	validation_1-rmse:3.41637
[160]	validation_0-rmse:1.98566	validation_1-rmse:3.00119
Fold: 1/3 rmse score = 2.91810
[0]	validation_0-rmse:3.39454	validation_1-rmse:3.42002
[157]	validation_0-rmse:1.98194	validation_1-rmse:2.97679
Fold: 2/3 rmse score = 2.90230
[0]	validation_0-rmse:3.40679	validation_1-rmse:3.40098
[153]	validation_0-rmse:1.98222	validation_1-rmse:2.98915


[I 2023-10-10 04:44:33,931] Trial 33 finished with value: 2.9087339672853116 and parameters: {'booster': 'gbtree', 'lambda': 0.31964766713645, 'alpha': 1.1243403754114169e-08, 'subsample': 0.7982152880833456, 'colsample_bytree': 0.664930089678848, 'n_estimators': 500, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.6091021325863272, 'gamma': 0.05251488639579342, 'grow_policy': 'depthwise'}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 2.90580
[0]	validation_0-rmse:3.77777	validation_1-rmse:3.77352
[599]	validation_0-rmse:2.64392	validation_1-rmse:2.83831
Fold: 1/3 rmse score = 2.83831
[0]	validation_0-rmse:3.76898	validation_1-rmse:3.79032
[599]	validation_0-rmse:2.64690	validation_1-rmse:2.84668
Fold: 2/3 rmse score = 2.84668
[0]	validation_0-rmse:3.78138	validation_1-rmse:3.76530
[599]	validation_0-rmse:2.63178	validation_1-rmse:2.83976


[I 2023-10-10 04:44:58,314] Trial 34 finished with value: 2.8415789669900633 and parameters: {'booster': 'gbtree', 'lambda': 0.35743599908530793, 'alpha': 2.679800003254979e-08, 'subsample': 0.8223308094910158, 'colsample_bytree': 0.5148050538526088, 'n_estimators': 600, 'max_depth': 7, 'min_child_weight': 6, 'eta': 0.04343852989052049, 'gamma': 0.05074202443155841, 'grow_policy': 'depthwise'}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 2.83976
[0]	validation_0-rmse:3.66466	validation_1-rmse:3.66075
[699]	validation_0-rmse:2.17580	validation_1-rmse:2.79330
Fold: 1/3 rmse score = 2.79283
[0]	validation_0-rmse:3.65492	validation_1-rmse:3.67674
[699]	validation_0-rmse:2.17876	validation_1-rmse:2.77682
Fold: 2/3 rmse score = 2.77681
[0]	validation_0-rmse:3.66732	validation_1-rmse:3.65228
[699]	validation_0-rmse:2.17416	validation_1-rmse:2.79588


[I 2023-10-10 04:45:23,472] Trial 35 finished with value: 2.7885047534086502 and parameters: {'booster': 'gbtree', 'lambda': 0.9649376137668262, 'alpha': 1.489188843813965e-08, 'subsample': 0.6933791481830411, 'colsample_bytree': 0.6394866348291637, 'n_estimators': 700, 'max_depth': 6, 'min_child_weight': 6, 'eta': 0.20548883065053195, 'gamma': 0.027203117566280594, 'grow_policy': 'depthwise'}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 2.79588
[0]	validation_0-rmse:3.80557	validation_1-rmse:3.80072
[623]	validation_0-rmse:3.19638	validation_1-rmse:3.19348
Fold: 1/3 rmse score = 3.38283
[0]	validation_0-rmse:3.79686	validation_1-rmse:3.81796
[637]	validation_0-rmse:3.19336	validation_1-rmse:3.21369
Fold: 2/3 rmse score = 3.39761
[0]	validation_0-rmse:3.80918	validation_1-rmse:3.79298
[640]	validation_0-rmse:3.19337	validation_1-rmse:3.20254


[I 2023-10-10 04:57:56,826] Trial 36 finished with value: 3.3873056543339533 and parameters: {'booster': 'dart', 'lambda': 0.8480287067583333, 'alpha': 1.2321078708824752e-08, 'subsample': 0.7489123969336445, 'colsample_bytree': 0.6211706471006024, 'n_estimators': 700, 'max_depth': 6, 'min_child_weight': 6, 'eta': 0.008030989733448317, 'gamma': 0.23378855764421366, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 0.8902235018104074, 'skip_drop': 1.702501716320659e-08}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 3.38148
[0]	validation_0-rmse:3.39395	validation_1-rmse:3.39251
[169]	validation_0-rmse:2.35369	validation_1-rmse:3.08146
Fold: 1/3 rmse score = 3.00070
[0]	validation_0-rmse:3.38162	validation_1-rmse:3.40543
[175]	validation_0-rmse:2.35278	validation_1-rmse:3.08427
Fold: 2/3 rmse score = 2.99154
[0]	validation_0-rmse:3.39317	validation_1-rmse:3.38761
[166]	validation_0-rmse:2.37183	validation_1-rmse:3.08696


[I 2023-10-10 04:58:06,389] Trial 37 finished with value: 3.000159533115053 and parameters: {'booster': 'gbtree', 'lambda': 0.30876439189190996, 'alpha': 5.048521909265675e-08, 'subsample': 0.8808851884349409, 'colsample_bytree': 0.5812589585248001, 'n_estimators': 800, 'max_depth': 6, 'min_child_weight': 6, 'eta': 0.9525263495007925, 'gamma': 0.027654526206774676, 'grow_policy': 'depthwise'}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 3.00823
[0]	validation_0-rmse:3.79991	validation_1-rmse:3.79515
[699]	validation_0-rmse:3.00648	validation_1-rmse:3.03636
Fold: 1/3 rmse score = 3.03636
[0]	validation_0-rmse:3.79114	validation_1-rmse:3.81236
[699]	validation_0-rmse:3.00247	validation_1-rmse:3.04943


[I 2023-10-10 04:58:21,422] Trial 38 pruned. 


Fold: 2/3 rmse score = 3.04943
[0]	validation_0-rmse:3.74159	validation_1-rmse:3.73703
[799]	validation_0-rmse:2.34300	validation_1-rmse:2.75658
Fold: 1/3 rmse score = 2.75658
[0]	validation_0-rmse:3.73303	validation_1-rmse:3.75359
[799]	validation_0-rmse:2.33933	validation_1-rmse:2.75646
Fold: 2/3 rmse score = 2.75646
[0]	validation_0-rmse:3.74493	validation_1-rmse:3.73081
[799]	validation_0-rmse:2.34084	validation_1-rmse:2.76690


[I 2023-10-10 05:24:00,363] Trial 39 finished with value: 2.7599548012071664 and parameters: {'booster': 'dart', 'lambda': 0.17846082030171193, 'alpha': 3.364653665272697e-08, 'subsample': 0.7605018612675059, 'colsample_bytree': 0.5391160593418066, 'n_estimators': 800, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.06678116430336005, 'gamma': 0.02666881934933164, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.6804793700794662e-08, 'skip_drop': 0.7643961277725114}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 2.76682
[0]	validation_0-rmse:3.74476	validation_1-rmse:3.73988
[899]	validation_0-rmse:2.30065	validation_1-rmse:2.74176
Fold: 1/3 rmse score = 2.74176
[0]	validation_0-rmse:3.73611	validation_1-rmse:3.75664
[899]	validation_0-rmse:2.30797	validation_1-rmse:2.74530
Fold: 2/3 rmse score = 2.74530
[0]	validation_0-rmse:3.74732	validation_1-rmse:3.73277
[899]	validation_0-rmse:2.29700	validation_1-rmse:2.74963


[I 2023-10-10 05:56:08,874] Trial 40 finished with value: 2.745560560488285 and parameters: {'booster': 'dart', 'lambda': 0.17488643173354654, 'alpha': 9.123263806704363e-07, 'subsample': 0.7920415921721711, 'colsample_bytree': 0.5756550335657311, 'n_estimators': 900, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.06366598974760403, 'gamma': 0.25124092349441557, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 3.232713481011407e-08, 'skip_drop': 0.7404650970493353}. Best is trial 32 with value: 2.722068574823022.


Fold: 3/3 rmse score = 2.74963
[0]	validation_0-rmse:3.71029	validation_1-rmse:3.70591
[999]	validation_0-rmse:2.00750	validation_1-rmse:2.70938
Fold: 1/3 rmse score = 2.70938
[0]	validation_0-rmse:3.70105	validation_1-rmse:3.72103
[999]	validation_0-rmse:2.01821	validation_1-rmse:2.71309
Fold: 2/3 rmse score = 2.71309
[0]	validation_0-rmse:3.71161	validation_1-rmse:3.69821
[999]	validation_0-rmse:2.01477	validation_1-rmse:2.71579


[I 2023-10-10 06:35:45,618] Trial 41 finished with value: 2.7127541394945687 and parameters: {'booster': 'dart', 'lambda': 0.16575861086319754, 'alpha': 4.7672202249640596e-07, 'subsample': 0.7872382459364554, 'colsample_bytree': 0.5654477668877006, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.0977204872349886, 'gamma': 0.22036177520409278, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.0763390413982438e-08, 'skip_drop': 0.7285832530365487}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.71579
[0]	validation_0-rmse:3.74833	validation_1-rmse:3.74353
[999]	validation_0-rmse:2.27129	validation_1-rmse:2.73940
Fold: 1/3 rmse score = 2.73934
[0]	validation_0-rmse:3.73990	validation_1-rmse:3.76065
[999]	validation_0-rmse:2.27598	validation_1-rmse:2.74257
Fold: 2/3 rmse score = 2.74257
[0]	validation_0-rmse:3.75108	validation_1-rmse:3.73634
[999]	validation_0-rmse:2.27461	validation_1-rmse:2.75082


[I 2023-10-10 07:15:23,010] Trial 42 finished with value: 2.7442333485679913 and parameters: {'booster': 'dart', 'lambda': 0.033672243663125274, 'alpha': 1.3973412160944986e-07, 'subsample': 0.7522064503025543, 'colsample_bytree': 0.5755672355203568, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.06015553730285521, 'gamma': 0.35817384072854147, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.2086433960952728e-08, 'skip_drop': 0.9150702843703045}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.75079
[0]	validation_0-rmse:3.75378	validation_1-rmse:3.74911
[999]	validation_0-rmse:2.31486	validation_1-rmse:2.74508
Fold: 1/3 rmse score = 2.74508
[0]	validation_0-rmse:3.74489	validation_1-rmse:3.76550
[999]	validation_0-rmse:2.31376	validation_1-rmse:2.74071
Fold: 2/3 rmse score = 2.74071
[0]	validation_0-rmse:3.75632	validation_1-rmse:3.74149
[999]	validation_0-rmse:2.31521	validation_1-rmse:2.75646


[I 2023-10-10 07:54:56,668] Trial 43 finished with value: 2.7474163108551903 and parameters: {'booster': 'dart', 'lambda': 0.2791348929770419, 'alpha': 1.7944319314304154e-07, 'subsample': 0.7742364512895884, 'colsample_bytree': 0.5802245296738939, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.05538647733788432, 'gamma': 0.3954095742857245, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.4556133943557153e-08, 'skip_drop': 0.9750731576095282}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.75646
[0]	validation_0-rmse:3.81159	validation_1-rmse:3.80681
[999]	validation_0-rmse:3.09456	validation_1-rmse:3.11139
Fold: 1/3 rmse score = 3.11139
[0]	validation_0-rmse:3.80292	validation_1-rmse:3.82411
[999]	validation_0-rmse:3.09254	validation_1-rmse:3.12524


[I 2023-10-10 08:21:39,467] Trial 44 pruned. 


Fold: 2/3 rmse score = 3.12524
[0]	validation_0-rmse:3.77133	validation_1-rmse:3.76647
[899]	validation_0-rmse:2.50361	validation_1-rmse:2.78811
Fold: 1/3 rmse score = 2.78808
[0]	validation_0-rmse:3.76262	validation_1-rmse:3.78343
[899]	validation_0-rmse:2.50315	validation_1-rmse:2.79471
Fold: 2/3 rmse score = 2.79471
[0]	validation_0-rmse:3.77439	validation_1-rmse:3.75916
[899]	validation_0-rmse:2.50154	validation_1-rmse:2.79854


[I 2023-10-10 08:54:49,029] Trial 45 finished with value: 2.7937758864176483 and parameters: {'booster': 'dart', 'lambda': 0.07619621998779237, 'alpha': 7.401805566657644e-07, 'subsample': 0.783463869475504, 'colsample_bytree': 0.6080085434677328, 'n_estimators': 900, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.03879589735005745, 'gamma': 0.7329899872961146, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.0635703804744064e-08, 'skip_drop': 0.9549017634642567}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.79854
[0]	validation_0-rmse:3.40283	validation_1-rmse:3.39790
[336]	validation_0-rmse:1.84921	validation_1-rmse:2.86009
Fold: 1/3 rmse score = 2.83814
[0]	validation_0-rmse:3.39557	validation_1-rmse:3.41291
[287]	validation_0-rmse:1.91678	validation_1-rmse:2.84958
Fold: 2/3 rmse score = 2.84081
[0]	validation_0-rmse:3.39293	validation_1-rmse:3.39200
[290]	validation_0-rmse:1.92063	validation_1-rmse:2.86438


[I 2023-10-10 08:59:03,821] Trial 46 finished with value: 2.8435567158491337 and parameters: {'booster': 'dart', 'lambda': 0.026647267545348026, 'alpha': 1.341285579846173e-07, 'subsample': 0.8325841537643965, 'colsample_bytree': 0.6754644873076766, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.451925734645941, 'gamma': 0.2836836283126501, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.403255320329954e-07, 'skip_drop': 0.064266518950121}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.85172
[0]	validation_0-rmse:3.80054	validation_1-rmse:3.79572
[899]	validation_0-rmse:2.94008	validation_1-rmse:2.99555
Fold: 1/3 rmse score = 2.99555
[0]	validation_0-rmse:3.79183	validation_1-rmse:3.81290
[899]	validation_0-rmse:2.93799	validation_1-rmse:3.00670
Fold: 2/3 rmse score = 3.00670
[0]	validation_0-rmse:3.80403	validation_1-rmse:3.78795
[899]	validation_0-rmse:2.93010	validation_1-rmse:3.00077


[I 2023-10-10 09:30:56,233] Trial 47 finished with value: 3.001006826256713 and parameters: {'booster': 'dart', 'lambda': 0.3920068132149754, 'alpha': 3.5399725216643245e-07, 'subsample': 0.9428574311170566, 'colsample_bytree': 0.5840644517812367, 'n_estimators': 900, 'max_depth': 6, 'min_child_weight': 4, 'eta': 0.012826191364817508, 'gamma': 0.47376867790779453, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.0323985196852995e-08, 'skip_drop': 0.07547566128028191}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 3.00077
[0]	validation_0-rmse:3.78090	validation_1-rmse:3.77639
[999]	validation_0-rmse:2.57319	validation_1-rmse:2.82366
Fold: 1/3 rmse score = 2.82366
[0]	validation_0-rmse:3.77245	validation_1-rmse:3.79331
[999]	validation_0-rmse:2.57376	validation_1-rmse:2.82628
Fold: 2/3 rmse score = 2.82628
[0]	validation_0-rmse:3.78423	validation_1-rmse:3.76870
[999]	validation_0-rmse:2.57290	validation_1-rmse:2.83525


[I 2023-10-10 10:10:39,886] Trial 48 finished with value: 2.8283956264886907 and parameters: {'booster': 'dart', 'lambda': 0.16289505989975228, 'alpha': 1.1189457100152212e-06, 'subsample': 0.73079940836495, 'colsample_bytree': 0.5077120814595937, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 2, 'eta': 0.029547059124562885, 'gamma': 0.11737986014049712, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 2.1624208980512185e-07, 'skip_drop': 0.06833916733278053}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.83525
[0]	validation_0-rmse:3.71582	validation_1-rmse:3.71039
[899]	validation_0-rmse:2.34907	validation_1-rmse:2.77223
Fold: 1/3 rmse score = 2.77219
[0]	validation_0-rmse:3.70696	validation_1-rmse:3.72717
[899]	validation_0-rmse:2.35618	validation_1-rmse:2.77645
Fold: 2/3 rmse score = 2.77645
[0]	validation_0-rmse:3.71759	validation_1-rmse:3.70388
[899]	validation_0-rmse:2.34904	validation_1-rmse:2.78659


[I 2023-10-10 10:43:05,946] Trial 49 finished with value: 2.778389977616566 and parameters: {'booster': 'dart', 'lambda': 0.005775964181075006, 'alpha': 5.851670779130156e-06, 'subsample': 0.7569654196045118, 'colsample_bytree': 0.5859999109027066, 'n_estimators': 900, 'max_depth': 6, 'min_child_weight': 4, 'eta': 0.09811762107771854, 'gamma': 0.8695862365231523, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 8.837789830769114e-08, 'skip_drop': 0.10515605394602845}. Best is trial 41 with value: 2.7127541394945687.


Fold: 3/3 rmse score = 2.78653


In [9]:
trial = study.best_trial

In [10]:
print(trial.params)

{'booster': 'dart', 'lambda': 0.16575861086319754, 'alpha': 4.7672202249640596e-07, 'subsample': 0.7872382459364554, 'colsample_bytree': 0.5654477668877006, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.0977204872349886, 'gamma': 0.22036177520409278, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 1.0763390413982438e-08, 'skip_drop': 0.7285832530365487}


In [11]:
best_param = {
  'booster': 'dart',
  'lambda': 0.16575861086319754,
  'alpha': 4.7672202249640596e-07,
  'subsample': 0.7872382459364554,
  'colsample_bytree': 0.5654477668877006,
  'n_estimators': 1000,
  'max_depth': 7,
  'min_child_weight': 4,
  'eta': 0.0977204872349886,
  'gamma': 0.22036177520409278,
  'grow_policy': 'depthwise',
  'sample_type': 'uniform',
  'normalize_type': 'forest',
  'rate_drop': 1.0763390413982438e-08,
  'skip_drop': 0.7285832530365487
}

val_scores, y_tests, models = train_model(best_param, train_x, train_y, train_null_df, test=True)

[0]	validation_0-rmse:3.71029	validation_1-rmse:3.70591
[999]	validation_0-rmse:2.00750	validation_1-rmse:2.70938
Fold: 1/3 rmse score = 2.70938
[0]	validation_0-rmse:3.70105	validation_1-rmse:3.72103
[999]	validation_0-rmse:2.01821	validation_1-rmse:2.71309
Fold: 2/3 rmse score = 2.71309
[0]	validation_0-rmse:3.71161	validation_1-rmse:3.69821
[999]	validation_0-rmse:2.01477	validation_1-rmse:2.71579
Fold: 3/3 rmse score = 2.71579


In [12]:
for idx, model in enumerate(models):
  file_name = f"./models/vwind_model_{idx}.model"
  model.save_model(file_name)

In [13]:
train_null_df["V_WIND"] = np.mean(y_tests, axis=0)

In [14]:
result = pd.concat([train_df, train_null_df], axis=0).sort_index()

In [15]:
result.head(10)

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Tanker,V_WIND,WITlt0,WTI,day,hour,minute,month,weekday,year
0,0,0,0,1,0,0,0,0,0,0,...,0,3.77,0,-1.081668,15,4,3,10,3,2020
1,0,0,0,1,0,0,0,0,0,0,...,0,-6.72,0,-0.188614,17,2,55,9,1,2019
2,0,0,0,1,0,0,0,0,0,0,...,0,0.0,0,-0.305226,23,6,43,2,5,2019
3,0,0,0,0,0,0,0,1,0,0,...,0,-7.31,0,-1.07438,18,22,6,9,4,2020
4,0,0,0,0,0,0,0,0,0,0,...,0,2.31,0,1.209272,13,12,57,8,5,2022
5,0,0,0,1,0,0,0,0,0,0,...,0,0.194459,0,-0.84067,8,14,24,9,1,2015
6,0,0,0,0,0,0,0,1,0,0,...,0,-0.36,0,-0.50784,25,21,34,1,0,2021
7,0,0,0,0,0,0,0,1,0,0,...,0,0.0,0,-0.459737,18,3,48,6,1,2019
8,0,0,0,0,0,0,0,1,0,0,...,0,1.048448,0,-0.488404,27,6,23,1,4,2017
9,0,0,0,0,0,0,0,0,0,0,...,0,-0.109305,0,0.008168,3,7,8,2,5,2018


In [16]:
result.to_parquet("./data/train_v2_2v.parquet")

In [17]:
test_winds = []
for model in models:
  test_winds.append(model.predict(test_df))

In [19]:
test_df["V_WIND"] = np.mean(test_winds, axis=0)

In [20]:
assert sorted(train_df.columns) == sorted(test_df.columns)

In [21]:
test_df.to_parquet("./data/test_v2_2v.parquet")