In [1]:
import warnings
warnings.filterwarnings(action="ignore")
import datetime
import time
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import HyperbandPruner

In [3]:
RANDOM_SEED = 99

In [2]:
train_df = pd.read_parquet('./data/train_v2_1_nn.parquet')
train_null_df = pd.read_parquet('./data/train_v2_1_n.parquet').drop(columns=["AIR_TEMPERATURE"])
test_df = pd.read_parquet('./data/test_v2_1.parquet').drop(columns=["AIR_TEMPERATURE"])

In [5]:
train_df.head()

Unnamed: 0,AIR_TEMPERATURE,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,...,SHIP_TYPE_CATEGORY_Container,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year
0,15.9,0,0,0,1,0,0,0,0,0,...,0,0,0,-1.081668,15,4,3,10,3,2020
1,24.5,0,0,0,1,0,0,0,0,0,...,1,0,0,-0.188614,17,2,55,9,1,2019
2,9.4,0,0,0,1,0,0,0,0,0,...,1,0,0,-0.305226,23,6,43,2,5,2019
3,22.1,0,0,0,0,0,0,0,1,0,...,0,0,0,-1.07438,18,22,6,9,4,2020
4,22.8,0,0,0,0,0,0,0,0,0,...,1,0,0,1.209272,13,12,57,8,5,2022


In [7]:
train_null_df.head()

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Container,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year
5,0,0,0,1,0,0,0,0,0,0,...,1,0,0,-0.84067,8,14,24,9,1,2015
8,0,0,0,0,0,0,0,1,0,0,...,1,0,0,-0.488404,27,6,23,1,4,2017
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.008168,3,7,8,2,5,2018
10,0,0,0,1,0,0,0,0,0,0,...,1,0,0,0.086881,14,3,33,8,1,2018
11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,-1.714288,8,22,51,2,0,2016


In [6]:
test_df.head()

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Container,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year
0,0,0,0,0,0,0,0,1,0,0,...,0,1,0,1.279239,27,8,29,8,5,2022
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,1.81614,27,20,57,3,6,2022
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.789955,18,2,6,1,2,2023
3,0,0,0,0,0,0,0,1,0,0,...,0,0,0,-1.152121,2,0,59,8,1,2016
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0.821537,24,0,27,1,1,2023


In [8]:
train_x = train_df.drop(columns=["AIR_TEMPERATURE"])
train_y = train_df["AIR_TEMPERATURE"]

In [9]:
def create_model(param):
    return xgb.XGBRegressor(
        random_state=RANDOM_SEED,
        tree_method="gpu_hist",
        eval_metric="mae",
        early_stopping_rounds=50,
        **param
    )

In [13]:
def train_model(param, X, y, X_test, trial=None, test=False):
  skf = KFold(n_splits=3, shuffle=True, random_state=RANDOM_SEED)

  val_scores = []
  y_tests = []
  models = []

  for idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):

    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

    model = create_model(param)
    model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=1000)

    y_hat_val = model.predict(X_val)
    score = mean_squared_error(y_val.values, y_hat_val)
    val_scores.append(score)
    print(f'Fold: {idx+1}/3 score = {score:.5f}')

    if test:
      y_tests.append(model.predict(X_test))
      models.append(model)
    
    if trial:
      trial.report(score, idx)

      if trial.should_prune():
        raise optuna.TrialPruned()

  return val_scores, y_tests, models

In [14]:
def objective_xgb(trial):
  param = {
    "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
    "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
    "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    "subsample": trial.suggest_float("subsample", 0.1, 1.0),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
    "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
    "max_depth": trial.suggest_int("max_depth", 3, 9),
    "min_child_weight": trial.suggest_int("min_child_weight", 2, 10),
    "eta": trial.suggest_float("eta", 1e-8, 1.0, log=True),
    "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
    "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
  }

  if param["booster"] == "dart":
    param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
    param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
    param['rate_drop'] = trial.suggest_float('rate_drop', 1e-8, 1.0, log=True)
    param['skip_drop'] = trial.suggest_float('skip_drop', 1e-8, 1.0, log=True)

  val_scores, y_tests, models = train_model(param, train_x, train_y, train_null_df, trial)

  return sum(val_scores) / len(val_scores)

In [15]:
study = optuna.create_study(
    sampler=TPESampler(seed=RANDOM_SEED),
    direction='minimize',
    study_name='xgb_tuning',
    pruner=HyperbandPruner(
      min_resource=1, max_resource=8, reduction_factor=3
    ),
)

study.optimize(objective_xgb, n_trials=50)

[I 2023-10-08 20:47:09,743] A new study created in memory with name: xgb_tuning


[0]	validation_0-mae:18.81049	validation_1-mae:18.72838
[299]	validation_0-mae:18.81043	validation_1-mae:18.72833
Fold: 1/3 score = 430.34211
[0]	validation_0-mae:18.76841	validation_1-mae:18.81253
[299]	validation_0-mae:18.76836	validation_1-mae:18.81248
Fold: 2/3 score = 432.90070
[0]	validation_0-mae:18.77046	validation_1-mae:18.80844
[299]	validation_0-mae:18.77040	validation_1-mae:18.80838


[I 2023-10-08 20:47:21,859] Trial 0 finished with value: 431.9718954260384 and parameters: {'booster': 'gbtree', 'lambda': 0.04017550942202233, 'alpha': 1.7847241527490656e-08, 'subsample': 0.8272449670283629, 'colsample_bytree': 0.6090556776494775, 'n_estimators': 300, 'max_depth': 3, 'min_child_weight': 10, 'eta': 1.1339812255777714e-08, 'gamma': 0.014399394343769443, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 431.9718954260384.


Fold: 3/3 score = 432.67287
[0]	validation_0-mae:18.81045	validation_1-mae:18.72835
[899]	validation_0-mae:18.77740	validation_1-mae:18.69535
Fold: 1/3 score = 428.95237
[0]	validation_0-mae:18.76838	validation_1-mae:18.81250
[899]	validation_0-mae:18.73541	validation_1-mae:18.77954
Fold: 2/3 score = 431.50935
[0]	validation_0-mae:18.77042	validation_1-mae:18.80840
[899]	validation_0-mae:18.73746	validation_1-mae:18.77539


[I 2023-10-08 21:20:27,369] Trial 1 finished with value: 430.58085712661483 and parameters: {'booster': 'dart', 'lambda': 1.4575798463318516e-05, 'alpha': 0.6189425995687715, 'subsample': 0.5719732438377161, 'colsample_bytree': 0.1842517839679773, 'n_estimators': 900, 'max_depth': 4, 'min_child_weight': 6, 'eta': 2.178476742104051e-06, 'gamma': 0.033817296080697445, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 1.4193450623251104e-07, 'skip_drop': 4.983900949448835e-07}. Best is trial 1 with value: 430.58085712661483.


Fold: 3/3 score = 431.28085
[0]	validation_0-mae:18.81049	validation_1-mae:18.72838
[199]	validation_0-mae:18.81041	validation_1-mae:18.72831
Fold: 1/3 score = 430.34112
[0]	validation_0-mae:18.76841	validation_1-mae:18.81253
[199]	validation_0-mae:18.76834	validation_1-mae:18.81246
Fold: 2/3 score = 432.89971
[0]	validation_0-mae:18.77046	validation_1-mae:18.80844
[199]	validation_0-mae:18.77038	validation_1-mae:18.80836


[I 2023-10-08 21:21:23,249] Trial 2 finished with value: 431.9708972927843 and parameters: {'booster': 'gbtree', 'lambda': 7.591896035963056e-07, 'alpha': 1.128298527729365e-08, 'subsample': 0.9087797683651598, 'colsample_bytree': 0.597010984189897, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 6, 'eta': 2.1580835301822618e-08, 'gamma': 0.00015813016339763232, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 430.58085712661483.


Fold: 3/3 score = 432.67187
[0]	validation_0-mae:13.25261	validation_1-mae:13.19424
[399]	validation_0-mae:2.12210	validation_1-mae:2.21289
Fold: 1/3 score = 9.47389
[0]	validation_0-mae:13.22585	validation_1-mae:13.27249
[399]	validation_0-mae:2.13709	validation_1-mae:2.24915
Fold: 2/3 score = 9.68523
[0]	validation_0-mae:13.24750	validation_1-mae:13.26119
[399]	validation_0-mae:2.13288	validation_1-mae:2.24604


[I 2023-10-08 21:21:37,873] Trial 3 finished with value: 9.614892745431709 and parameters: {'booster': 'gbtree', 'lambda': 0.5574271302327638, 'alpha': 5.369144319173959e-05, 'subsample': 0.3376820528693848, 'colsample_bytree': 0.5944405760344648, 'n_estimators': 400, 'max_depth': 4, 'min_child_weight': 10, 'eta': 0.32615150214792754, 'gamma': 0.05011280824271637, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 9.68556
[0]	validation_0-mae:18.81048	validation_1-mae:18.72838
[699]	validation_0-mae:18.80818	validation_1-mae:18.72608
Fold: 1/3 score = 430.24399
[0]	validation_0-mae:18.76841	validation_1-mae:18.81253
[699]	validation_0-mae:18.76611	validation_1-mae:18.81023
Fold: 2/3 score = 432.80271
[0]	validation_0-mae:18.77046	validation_1-mae:18.80844
[699]	validation_0-mae:18.76816	validation_1-mae:18.80613


[I 2023-10-08 21:22:02,266] Trial 4 finished with value: 431.87374234926307 and parameters: {'booster': 'gbtree', 'lambda': 1.9040058970389597e-05, 'alpha': 0.0013569405041690126, 'subsample': 0.9788061899951741, 'colsample_bytree': 0.7347595233221714, 'n_estimators': 700, 'max_depth': 4, 'min_child_weight': 4, 'eta': 1.9058143481065904e-07, 'gamma': 2.700633373820546e-06, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 432.57453
[0]	validation_0-mae:18.80641	validation_1-mae:18.72431
[999]	validation_0-mae:18.30009	validation_1-mae:18.21944
Fold: 1/3 score = 408.35422
[0]	validation_0-mae:18.76432	validation_1-mae:18.80844
[999]	validation_0-mae:18.25874	validation_1-mae:18.30321


[I 2023-10-08 21:49:08,259] Trial 5 pruned. 


Fold: 2/3 score = 410.87421
[0]	validation_0-mae:18.78460	validation_1-mae:18.70252
[599]	validation_0-mae:9.27081	validation_1-mae:9.22420
Fold: 1/3 score = 111.04514
[0]	validation_0-mae:18.74250	validation_1-mae:18.78659
[599]	validation_0-mae:9.25800	validation_1-mae:9.29105
Fold: 2/3 score = 112.48687


[I 2023-10-08 21:49:22,949] Trial 6 pruned. 


[0]	validation_0-mae:18.81041	validation_1-mae:18.72830
[899]	validation_0-mae:18.73990	validation_1-mae:18.65796
Fold: 1/3 score = 427.26589
[0]	validation_0-mae:18.76833	validation_1-mae:18.81245
[899]	validation_0-mae:18.69798	validation_1-mae:18.74210


[I 2023-10-08 22:11:21,630] Trial 7 pruned. 


Fold: 2/3 score = 429.81912
[0]	validation_0-mae:18.79924	validation_1-mae:18.71717
[799]	validation_0-mae:12.25087	validation_1-mae:12.18905
Fold: 1/3 score = 190.58969
[0]	validation_0-mae:18.75719	validation_1-mae:18.80132
[799]	validation_0-mae:12.22901	validation_1-mae:12.26786
Fold: 2/3 score = 192.54546
[0]	validation_0-mae:18.75924	validation_1-mae:18.79719
[799]	validation_0-mae:12.23027	validation_1-mae:12.25608


[I 2023-10-08 22:37:28,016] Trial 8 finished with value: 191.81036015729944 and parameters: {'booster': 'dart', 'lambda': 0.016269894265187258, 'alpha': 0.00027473371985831937, 'subsample': 0.7205784430671397, 'colsample_bytree': 0.32422360794447835, 'n_estimators': 800, 'max_depth': 5, 'min_child_weight': 7, 'eta': 0.000640160651175959, 'gamma': 0.0007690362020385368, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 2.469722638494642e-07, 'skip_drop': 0.0028449094047407666}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 192.29593
[0]	validation_0-mae:18.81049	validation_1-mae:18.72838
[699]	validation_0-mae:18.80885	validation_1-mae:18.72675
Fold: 1/3 score = 430.27544
[0]	validation_0-mae:18.76841	validation_1-mae:18.81253
[699]	validation_0-mae:18.76678	validation_1-mae:18.81090


[I 2023-10-08 22:51:39,709] Trial 9 pruned. 


Fold: 2/3 score = 432.83398
[0]	validation_0-mae:9.21040	validation_1-mae:9.16684
[72]	validation_0-mae:2.52679	validation_1-mae:2.72491
Fold: 1/3 score = 12.82820
[0]	validation_0-mae:9.40190	validation_1-mae:9.42883
[71]	validation_0-mae:2.55000	validation_1-mae:2.76813
Fold: 2/3 score = 12.84602
[0]	validation_0-mae:9.42890	validation_1-mae:9.45327
[73]	validation_0-mae:2.50719	validation_1-mae:2.69098


[I 2023-10-08 22:51:49,689] Trial 10 finished with value: 13.060042686202735 and parameters: {'booster': 'gbtree', 'lambda': 0.20136472246731266, 'alpha': 0.0037294959598213996, 'subsample': 0.1136943669843487, 'colsample_bytree': 0.9836482175958339, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.5421827357095554, 'gamma': 0.6491750314063931, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 13.50590
[0]	validation_0-mae:3.81014	validation_1-mae:3.79963
[58]	validation_0-mae:4.73417	validation_1-mae:5.15297
Fold: 1/3 score = 15.65732
[0]	validation_0-mae:3.63375	validation_1-mae:3.66788
[60]	validation_0-mae:3.71076	validation_1-mae:3.91394
Fold: 2/3 score = 15.84998
[0]	validation_0-mae:4.02068	validation_1-mae:4.04709
[59]	validation_0-mae:4.70284	validation_1-mae:5.23076


[I 2023-10-08 22:51:57,498] Trial 11 finished with value: 16.119175085824704 and parameters: {'booster': 'gbtree', 'lambda': 0.6915180707346773, 'alpha': 0.007676817778163076, 'subsample': 0.10260407700657599, 'colsample_bytree': 0.9303022934183071, 'n_estimators': 400, 'max_depth': 8, 'min_child_weight': 2, 'eta': 0.989503289598702, 'gamma': 0.7293916524775244, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 16.85022
[0]	validation_0-mae:7.10652	validation_1-mae:7.07318
[117]	validation_0-mae:1.89300	validation_1-mae:2.30249
Fold: 1/3 score = 10.18842
[0]	validation_0-mae:7.08424	validation_1-mae:7.11025
[115]	validation_0-mae:1.89525	validation_1-mae:2.31433
Fold: 2/3 score = 10.19563
[0]	validation_0-mae:7.12963	validation_1-mae:7.14343
[117]	validation_0-mae:1.88475	validation_1-mae:2.30114


[I 2023-10-08 22:52:09,840] Trial 12 finished with value: 10.178980736881362 and parameters: {'booster': 'gbtree', 'lambda': 0.9390089759158639, 'alpha': 0.0085597513515605, 'subsample': 0.4052937943893192, 'colsample_bytree': 0.965774787473821, 'n_estimators': 400, 'max_depth': 7, 'min_child_weight': 2, 'eta': 0.6887165204615721, 'gamma': 0.998337799421535, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 10.15289
[0]	validation_0-mae:17.74960	validation_1-mae:17.67174
[99]	validation_0-mae:2.56618	validation_1-mae:2.59531
Fold: 1/3 score = 12.87753
[0]	validation_0-mae:17.70969	validation_1-mae:17.75318
[99]	validation_0-mae:2.57543	validation_1-mae:2.62496
Fold: 2/3 score = 13.08916
[0]	validation_0-mae:17.71498	validation_1-mae:17.74996
[99]	validation_0-mae:2.55329	validation_1-mae:2.58805


[I 2023-10-08 22:52:21,663] Trial 13 finished with value: 12.949987172459705 and parameters: {'booster': 'gbtree', 'lambda': 0.006004222319263677, 'alpha': 5.162400489170195e-05, 'subsample': 0.4220775521125347, 'colsample_bytree': 0.7575724832065093, 'n_estimators': 100, 'max_depth': 7, 'min_child_weight': 4, 'eta': 0.05933074855108428, 'gamma': 0.6683362526305419, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 12.88327
[0]	validation_0-mae:18.45914	validation_1-mae:18.37685
[499]	validation_0-mae:2.57575	validation_1-mae:2.60102
Fold: 1/3 score = 12.86248
[0]	validation_0-mae:18.41785	validation_1-mae:18.46226
[499]	validation_0-mae:2.56416	validation_1-mae:2.61059


[I 2023-10-08 22:52:39,959] Trial 14 pruned. 


Fold: 2/3 score = 12.94494
[0]	validation_0-mae:18.45718	validation_1-mae:18.37630
[299]	validation_0-mae:2.57103	validation_1-mae:2.60220
Fold: 1/3 score = 12.84385
[0]	validation_0-mae:18.41529	validation_1-mae:18.45931
[299]	validation_0-mae:2.54014	validation_1-mae:2.59338
Fold: 2/3 score = 12.81195
[0]	validation_0-mae:18.41868	validation_1-mae:18.45566
[299]	validation_0-mae:2.56275	validation_1-mae:2.60040


[I 2023-10-08 22:53:07,839] Trial 15 finished with value: 12.855566472072828 and parameters: {'booster': 'gbtree', 'lambda': 0.0016302588828141604, 'alpha': 0.00022220824010587233, 'subsample': 0.5039745036025409, 'colsample_bytree': 0.703970317433247, 'n_estimators': 300, 'max_depth': 7, 'min_child_weight': 3, 'eta': 0.01974121531316481, 'gamma': 0.0018815303958341514, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 9.614892745431709.


Fold: 3/3 score = 12.91090
[0]	validation_0-mae:16.21349	validation_1-mae:16.14098
[499]	validation_0-mae:1.64113	validation_1-mae:1.92668
Fold: 1/3 score = 7.53428
[0]	validation_0-mae:16.17832	validation_1-mae:16.22104
[499]	validation_0-mae:1.65081	validation_1-mae:1.95026
Fold: 2/3 score = 7.70243
[0]	validation_0-mae:16.19054	validation_1-mae:16.22191
[499]	validation_0-mae:1.65064	validation_1-mae:1.94683
Fold: 3/3 score = 7.70134

[I 2023-10-08 22:53:42,226] Trial 16 finished with value: 7.64601576181905 and parameters: {'booster': 'gbtree', 'lambda': 0.0011101304704990158, 'alpha': 0.018777324539088213, 'subsample': 0.36059458575414616, 'colsample_bytree': 0.8542635526574458, 'n_estimators': 500, 'max_depth': 7, 'min_child_weight': 8, 'eta': 0.14566841873916728, 'gamma': 0.10061518059491076, 'grow_policy': 'lossguide'}. Best is trial 16 with value: 7.64601576181905.



[0]	validation_0-mae:18.75236	validation_1-mae:18.67042
[599]	validation_0-mae:4.89718	validation_1-mae:4.86985
Fold: 1/3 score = 34.84741
[0]	validation_0-mae:18.71050	validation_1-mae:18.75464
[599]	validation_0-mae:4.88609	validation_1-mae:4.92022


[I 2023-10-08 22:54:00,292] Trial 17 pruned. 


Fold: 2/3 score = 35.53608
[0]	validation_0-mae:17.07061	validation_1-mae:16.99543
[199]	validation_0-mae:1.77108	validation_1-mae:1.96270
Fold: 1/3 score = 7.90792
[0]	validation_0-mae:17.03184	validation_1-mae:17.07509
[199]	validation_0-mae:1.77117	validation_1-mae:1.98469
Fold: 2/3 score = 8.00734
[0]	validation_0-mae:17.03568	validation_1-mae:17.06973
[199]	validation_0-mae:1.75802	validation_1-mae:1.96597


[I 2023-10-08 22:54:14,080] Trial 18 finished with value: 7.9698738788883725 and parameters: {'booster': 'gbtree', 'lambda': 0.06870441050020963, 'alpha': 0.001008867356309089, 'subsample': 0.5677625149381036, 'colsample_bytree': 0.46267397765136364, 'n_estimators': 200, 'max_depth': 9, 'min_child_weight': 9, 'eta': 0.0988204152102819, 'gamma': 0.09273277305571015, 'grow_policy': 'depthwise'}. Best is trial 16 with value: 7.64601576181905.


Fold: 3/3 score = 7.99436
[0]	validation_0-mae:18.69919	validation_1-mae:18.61747
[199]	validation_0-mae:6.63880	validation_1-mae:6.61655
Fold: 1/3 score = 58.25262
[0]	validation_0-mae:18.65735	validation_1-mae:18.70140
[199]	validation_0-mae:6.63989	validation_1-mae:6.68208


[I 2023-10-08 22:54:25,813] Trial 19 pruned. 


Fold: 2/3 score = 59.39201
[0]	validation_0-mae:17.72133	validation_1-mae:17.63988
[99]	validation_0-mae:2.42618	validation_1-mae:2.48123
Fold: 1/3 score = 11.85488
[0]	validation_0-mae:17.68195	validation_1-mae:17.72601
[99]	validation_0-mae:2.39882	validation_1-mae:2.48061
Fold: 2/3 score = 11.81159
[0]	validation_0-mae:17.68393	validation_1-mae:17.72129
[99]	validation_0-mae:2.37359	validation_1-mae:2.44272


[I 2023-10-08 22:54:34,493] Trial 20 finished with value: 11.763645830354035 and parameters: {'booster': 'gbtree', 'lambda': 0.042733330364855555, 'alpha': 0.08394543238747437, 'subsample': 0.6547691175699307, 'colsample_bytree': 0.4383448218989016, 'n_estimators': 100, 'max_depth': 8, 'min_child_weight': 9, 'eta': 0.06414846914903319, 'gamma': 0.00618944227597624, 'grow_policy': 'depthwise'}. Best is trial 16 with value: 7.64601576181905.


Fold: 3/3 score = 11.62447
[0]	validation_0-mae:16.83708	validation_1-mae:16.75991
[499]	validation_0-mae:1.84217	validation_1-mae:1.98492
Fold: 1/3 score = 7.96064
[0]	validation_0-mae:16.79723	validation_1-mae:16.84125
[499]	validation_0-mae:1.85830	validation_1-mae:2.01926
Fold: 2/3 score = 8.14431
[0]	validation_0-mae:16.80653	validation_1-mae:16.83892
[499]	validation_0-mae:1.84911	validation_1-mae:2.01059


[I 2023-10-08 22:54:53,214] Trial 21 finished with value: 8.077606191203754 and parameters: {'booster': 'gbtree', 'lambda': 0.10838080669934495, 'alpha': 0.0003943658550958195, 'subsample': 0.5007114190709858, 'colsample_bytree': 0.6627878938943187, 'n_estimators': 500, 'max_depth': 6, 'min_child_weight': 9, 'eta': 0.11114210357154419, 'gamma': 0.10422669691874459, 'grow_policy': 'depthwise'}. Best is trial 16 with value: 7.64601576181905.


Fold: 3/3 score = 8.12786
[0]	validation_0-mae:17.56355	validation_1-mae:17.48561
[499]	validation_0-mae:1.82452	validation_1-mae:1.97076
Fold: 1/3 score = 7.88718
[0]	validation_0-mae:17.52207	validation_1-mae:17.56660
[499]	validation_0-mae:1.82995	validation_1-mae:1.99040
Fold: 2/3 score = 8.01538
[0]	validation_0-mae:17.52989	validation_1-mae:17.56371
[499]	validation_0-mae:1.81726	validation_1-mae:1.97321
Fold: 3/3 score = 7.95078

[I 2023-10-08 22:55:14,210] Trial 22 finished with value: 7.951112269258804 and parameters: {'booster': 'gbtree', 'lambda': 0.10036201771551814, 'alpha': 0.0015574736252593846, 'subsample': 0.5158664801781194, 'colsample_bytree': 0.6745341457678686, 'n_estimators': 500, 'max_depth': 7, 'min_child_weight': 9, 'eta': 0.0697017920303057, 'gamma': 0.14566804587799506, 'grow_policy': 'depthwise'}. Best is trial 16 with value: 7.64601576181905.



[0]	validation_0-mae:18.58504	validation_1-mae:18.50371
[299]	validation_0-mae:2.48255	validation_1-mae:2.53830
Fold: 1/3 score = 12.11359
[0]	validation_0-mae:18.54331	validation_1-mae:18.58730
[299]	validation_0-mae:2.48206	validation_1-mae:2.56279
Fold: 2/3 score = 12.30688

[I 2023-10-08 22:55:31,010] Trial 23 pruned. 



[0]	validation_0-mae:16.03899	validation_1-mae:15.96711
[699]	validation_0-mae:1.22823	validation_1-mae:1.82416
Fold: 1/3 score = 6.90467
[0]	validation_0-mae:15.99856	validation_1-mae:16.04247
[699]	validation_0-mae:1.23813	validation_1-mae:1.84219
Fold: 2/3 score = 6.99383
[0]	validation_0-mae:16.01169	validation_1-mae:16.04315
[699]	validation_0-mae:1.22747	validation_1-mae:1.83770


[I 2023-10-08 22:56:03,287] Trial 24 finished with value: 6.9726137847093135 and parameters: {'booster': 'gbtree', 'lambda': 0.07047565157479507, 'alpha': 0.02013676035072308, 'subsample': 0.5094564948585608, 'colsample_bytree': 0.6645083758135852, 'n_estimators': 700, 'max_depth': 8, 'min_child_weight': 7, 'eta': 0.15481838890693606, 'gamma': 0.008455063373887487, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 7.01934
[0]	validation_0-mae:15.61171	validation_1-mae:15.54085
[699]	validation_0-mae:1.42880	validation_1-mae:1.90224
Fold: 1/3 score = 7.34175
[0]	validation_0-mae:15.57296	validation_1-mae:15.61696
[699]	validation_0-mae:1.42204	validation_1-mae:1.91663
Fold: 2/3 score = 7.43876
[0]	validation_0-mae:15.58856	validation_1-mae:15.61674
[699]	validation_0-mae:1.40563	validation_1-mae:1.89326


[I 2023-10-08 22:56:31,237] Trial 25 finished with value: 7.376607969298452 and parameters: {'booster': 'gbtree', 'lambda': 0.004847187306322262, 'alpha': 0.029019694853729298, 'subsample': 0.4594740410872197, 'colsample_bytree': 0.6571303681255534, 'n_estimators': 700, 'max_depth': 7, 'min_child_weight': 7, 'eta': 0.17972238561853554, 'gamma': 0.008154755350276618, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 7.34931
[0]	validation_0-mae:18.76452	validation_1-mae:18.68258
[699]	validation_0-mae:4.63100	validation_1-mae:4.62179
Fold: 1/3 score = 30.68408
[0]	validation_0-mae:18.72256	validation_1-mae:18.76666
[699]	validation_0-mae:4.62878	validation_1-mae:4.66820


[I 2023-10-08 23:09:58,730] Trial 26 pruned. 


Fold: 2/3 score = 31.29306
[0]	validation_0-mae:15.26597	validation_1-mae:15.19753
[590]	validation_0-mae:1.46416	validation_1-mae:1.93505
Fold: 1/3 score = 7.57342
[0]	validation_0-mae:15.23029	validation_1-mae:15.27453
[799]	validation_0-mae:1.33054	validation_1-mae:1.94482
Fold: 2/3 score = 7.60430
[0]	validation_0-mae:15.24573	validation_1-mae:15.27335
[799]	validation_0-mae:1.33215	validation_1-mae:1.93635


[I 2023-10-08 23:10:28,502] Trial 27 finished with value: 7.610557473554562 and parameters: {'booster': 'gbtree', 'lambda': 0.0005491022814404359, 'alpha': 0.04555450758049558, 'subsample': 0.42160871782765835, 'colsample_bytree': 0.651815009160793, 'n_estimators': 800, 'max_depth': 7, 'min_child_weight': 5, 'eta': 0.1992962799819796, 'gamma': 0.00047484381451495605, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 7.65394
[0]	validation_0-mae:13.99239	validation_1-mae:13.93052
[338]	validation_0-mae:1.38750	validation_1-mae:1.94771
Fold: 1/3 score = 7.70635
[0]	validation_0-mae:13.95509	validation_1-mae:13.99761
[265]	validation_0-mae:1.49276	validation_1-mae:1.96993
Fold: 2/3 score = 7.84821
[0]	validation_0-mae:13.97421	validation_1-mae:14.00315
[238]	validation_0-mae:1.53357	validation_1-mae:1.98779


[I 2023-10-08 23:10:43,907] Trial 28 finished with value: 7.87500370838076 and parameters: {'booster': 'gbtree', 'lambda': 0.00020184672924539393, 'alpha': 0.20489253148520536, 'subsample': 0.47473276072308535, 'colsample_bytree': 0.65103958517127, 'n_estimators': 800, 'max_depth': 8, 'min_child_weight': 5, 'eta': 0.27069101135265955, 'gamma': 0.00047015586378333445, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 8.07045
[0]	validation_0-mae:18.37170	validation_1-mae:18.29108
[799]	validation_0-mae:2.23631	validation_1-mae:2.29330
Fold: 1/3 score = 10.32595
[0]	validation_0-mae:18.33072	validation_1-mae:18.37499
[799]	validation_0-mae:2.24424	validation_1-mae:2.31647


[I 2023-10-08 23:11:02,033] Trial 29 pruned. 


Fold: 2/3 score = 10.48122
[0]	validation_0-mae:4.44374	validation_1-mae:4.42896
[95]	validation_0-mae:2.08841	validation_1-mae:2.49538
Fold: 1/3 score = 11.73449
[0]	validation_0-mae:4.50319	validation_1-mae:4.53799
[80]	validation_0-mae:2.12599	validation_1-mae:2.49818
Fold: 2/3 score = 11.84360
[0]	validation_0-mae:4.53652	validation_1-mae:4.54862
[96]	validation_0-mae:2.10148	validation_1-mae:2.50087


[I 2023-10-08 23:11:09,237] Trial 30 finished with value: 11.754757614823921 and parameters: {'booster': 'gbtree', 'lambda': 0.019720930113729426, 'alpha': 0.15358826981468207, 'subsample': 0.5207015371512629, 'colsample_bytree': 0.6443900315890072, 'n_estimators': 1000, 'max_depth': 7, 'min_child_weight': 7, 'eta': 0.9429446678951952, 'gamma': 0.002236137714182796, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 11.68618
[0]	validation_0-mae:16.02463	validation_1-mae:15.95337
[599]	validation_0-mae:1.57108	validation_1-mae:1.92926
Fold: 1/3 score = 7.57759
[0]	validation_0-mae:15.98957	validation_1-mae:16.03250
[599]	validation_0-mae:1.56860	validation_1-mae:1.93139
Fold: 2/3 score = 7.60251
[0]	validation_0-mae:16.00141	validation_1-mae:16.03252
[599]	validation_0-mae:1.54894	validation_1-mae:1.91867


[I 2023-10-08 23:11:33,430] Trial 31 finished with value: 7.565678730043065 and parameters: {'booster': 'gbtree', 'lambda': 0.0013034541776835804, 'alpha': 0.02131821211212076, 'subsample': 0.3938200993017075, 'colsample_bytree': 0.7184009739700936, 'n_estimators': 600, 'max_depth': 7, 'min_child_weight': 8, 'eta': 0.15644149590577475, 'gamma': 3.625459392310082e-05, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 7.51693
[0]	validation_0-mae:13.38248	validation_1-mae:13.32032
[240]	validation_0-mae:1.52353	validation_1-mae:1.98620
Fold: 1/3 score = 8.00305
[0]	validation_0-mae:13.35292	validation_1-mae:13.39527
[296]	validation_0-mae:1.45757	validation_1-mae:1.99606
Fold: 2/3 score = 8.05530
[0]	validation_0-mae:13.37381	validation_1-mae:13.40161
[240]	validation_0-mae:1.50475	validation_1-mae:1.99149


[I 2023-10-08 23:11:47,918] Trial 32 finished with value: 8.037787293281978 and parameters: {'booster': 'gbtree', 'lambda': 0.0004299687210286402, 'alpha': 0.03402362759414512, 'subsample': 0.44727394452330393, 'colsample_bytree': 0.7168399090108613, 'n_estimators': 700, 'max_depth': 8, 'min_child_weight': 6, 'eta': 0.3050828805291959, 'gamma': 2.6690157138457977e-05, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 8.05501
[0]	validation_0-mae:18.18628	validation_1-mae:18.10609
[599]	validation_0-mae:2.01871	validation_1-mae:2.11706
Fold: 1/3 score = 9.01357
[0]	validation_0-mae:18.14507	validation_1-mae:18.18905
[599]	validation_0-mae:2.00615	validation_1-mae:2.11871
Fold: 2/3 score = 9.00689
[0]	validation_0-mae:18.14939	validation_1-mae:18.18566
[599]	validation_0-mae:2.00984	validation_1-mae:2.11751


[I 2023-10-08 23:12:12,427] Trial 33 finished with value: 9.024419331655219 and parameters: {'booster': 'gbtree', 'lambda': 3.890860028911947e-05, 'alpha': 0.4814564210308372, 'subsample': 0.5403609494163653, 'colsample_bytree': 0.607461861740959, 'n_estimators': 600, 'max_depth': 7, 'min_child_weight': 7, 'eta': 0.034813438935766605, 'gamma': 2.3405744267443723e-05, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 9.05280
[0]	validation_0-mae:18.66426	validation_1-mae:18.58255
[899]	validation_0-mae:2.65727	validation_1-mae:2.67670
Fold: 1/3 score = 13.55016
[0]	validation_0-mae:18.62246	validation_1-mae:18.66660
[899]	validation_0-mae:2.65441	validation_1-mae:2.69227


[I 2023-10-08 23:12:33,450] Trial 34 pruned. 


Fold: 2/3 score = 13.72651
[0]	validation_0-mae:15.47482	validation_1-mae:15.40491
[583]	validation_0-mae:1.20228	validation_1-mae:1.87464
Fold: 1/3 score = 7.28309
[0]	validation_0-mae:15.43806	validation_1-mae:15.48100
[610]	validation_0-mae:1.19132	validation_1-mae:1.88536
Fold: 2/3 score = 7.29221
[0]	validation_0-mae:15.45294	validation_1-mae:15.48341
[543]	validation_0-mae:1.24588	validation_1-mae:1.88630


[I 2023-10-08 23:13:02,243] Trial 35 finished with value: 7.317555695546008 and parameters: {'booster': 'gbtree', 'lambda': 0.0028633396462646662, 'alpha': 0.23864632266398572, 'subsample': 0.4533075494016755, 'colsample_bytree': 0.7550930778623806, 'n_estimators': 800, 'max_depth': 8, 'min_child_weight': 4, 'eta': 0.18644051165534287, 'gamma': 0.023595016857011753, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 7.37737
[0]	validation_0-mae:14.55162	validation_1-mae:14.48347
[120]	validation_0-mae:3.49906	validation_1-mae:3.49347
Fold: 1/3 score = 383.66347
[0]	validation_0-mae:14.51595	validation_1-mae:14.55944
[119]	validation_0-mae:3.54843	validation_1-mae:3.58080
Fold: 2/3 score = 386.08929
[0]	validation_0-mae:14.51600	validation_1-mae:14.54417
[119]	validation_0-mae:3.52943	validation_1-mae:3.55420


[I 2023-10-08 23:13:49,459] Trial 36 finished with value: 385.209668319438 and parameters: {'booster': 'dart', 'lambda': 0.0021893681978171877, 'alpha': 0.1877450475502135, 'subsample': 0.6071795193721571, 'colsample_bytree': 0.7832709729452115, 'n_estimators': 700, 'max_depth': 8, 'min_child_weight': 4, 'eta': 0.2374155469483285, 'gamma': 0.016868289583949574, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 0.8902235018104074, 'skip_drop': 1.702501716320659e-08}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 385.87624
[0]	validation_0-mae:18.12876	validation_1-mae:18.04925
[599]	validation_0-mae:1.62161	validation_1-mae:1.87636
Fold: 1/3 score = 7.35659
[0]	validation_0-mae:18.08837	validation_1-mae:18.13200
[599]	validation_0-mae:1.63646	validation_1-mae:1.90390
Fold: 2/3 score = 7.46987
[0]	validation_0-mae:18.09165	validation_1-mae:18.12871
[599]	validation_0-mae:1.61658	validation_1-mae:1.88806


[I 2023-10-08 23:14:26,095] Trial 37 finished with value: 7.425593437778816 and parameters: {'booster': 'gbtree', 'lambda': 0.003186600801230674, 'alpha': 0.9259278556462924, 'subsample': 0.48776435216683967, 'colsample_bytree': 0.7381466488200074, 'n_estimators': 600, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.03766883420745457, 'gamma': 0.04197089543269219, 'grow_policy': 'depthwise'}. Best is trial 24 with value: 6.9726137847093135.


Fold: 3/3 score = 7.45032
[0]	validation_0-mae:18.05292	validation_1-mae:17.97368
[899]	validation_0-mae:1.39905	validation_1-mae:1.79511
Fold: 1/3 score = 6.79553
[0]	validation_0-mae:18.01275	validation_1-mae:18.05639
[899]	validation_0-mae:1.40162	validation_1-mae:1.81044
Fold: 2/3 score = 6.82716
[0]	validation_0-mae:18.01635	validation_1-mae:18.05350
[899]	validation_0-mae:1.38973	validation_1-mae:1.79617


[I 2023-10-08 23:15:18,457] Trial 38 finished with value: 6.811475357837906 and parameters: {'booster': 'gbtree', 'lambda': 0.0046997964749946695, 'alpha': 0.7619213721974158, 'subsample': 0.47981639929384867, 'colsample_bytree': 0.7622873766385682, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.04187486416289307, 'gamma': 0.026784155748935532, 'grow_policy': 'depthwise'}. Best is trial 38 with value: 6.811475357837906.


Fold: 3/3 score = 6.81173
[0]	validation_0-mae:18.67317	validation_1-mae:18.59155
[899]	validation_0-mae:2.10009	validation_1-mae:2.20489
Fold: 1/3 score = 9.73117
[0]	validation_0-mae:18.63125	validation_1-mae:18.67531
[899]	validation_0-mae:2.10108	validation_1-mae:2.22194


[I 2023-10-08 23:39:04,999] Trial 39 pruned. 


Fold: 2/3 score = 9.82603
[0]	validation_0-mae:18.79427	validation_1-mae:18.71224
[999]	validation_0-mae:8.34737	validation_1-mae:8.32036
Fold: 1/3 score = 89.34036
[0]	validation_0-mae:18.75223	validation_1-mae:18.79634
[999]	validation_0-mae:8.33209	validation_1-mae:8.37449


[I 2023-10-08 23:39:56,329] Trial 40 pruned. 


Fold: 2/3 score = 90.44605
[0]	validation_0-mae:17.84085	validation_1-mae:17.76240
[799]	validation_0-mae:1.33686	validation_1-mae:1.78167
Fold: 1/3 score = 6.72701
[0]	validation_0-mae:17.80124	validation_1-mae:17.84475
[799]	validation_0-mae:1.34583	validation_1-mae:1.80383
Fold: 2/3 score = 6.80365
[0]	validation_0-mae:17.80526	validation_1-mae:17.84219
[799]	validation_0-mae:1.33424	validation_1-mae:1.78675


[I 2023-10-08 23:40:44,896] Trial 41 finished with value: 6.758899281532664 and parameters: {'booster': 'gbtree', 'lambda': 0.003585945247544296, 'alpha': 0.8238938333608493, 'subsample': 0.4798755271378334, 'colsample_bytree': 0.7672336186929946, 'n_estimators': 800, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.05361436778736669, 'gamma': 0.03619494440567489, 'grow_policy': 'depthwise'}. Best is trial 41 with value: 6.758899281532664.


Fold: 3/3 score = 6.74603
[0]	validation_0-mae:10.46008	validation_1-mae:10.41317
[114]	validation_0-mae:1.54702	validation_1-mae:2.14793
Fold: 1/3 score = 8.98683
[0]	validation_0-mae:10.43819	validation_1-mae:10.48285
[118]	validation_0-mae:1.52499	validation_1-mae:2.15185


[I 2023-10-08 23:40:52,145] Trial 42 pruned. 


Fold: 2/3 score = 9.04456
[0]	validation_0-mae:17.96168	validation_1-mae:17.88269
[899]	validation_0-mae:1.51380	validation_1-mae:1.82574
Fold: 1/3 score = 6.95230
[0]	validation_0-mae:17.92007	validation_1-mae:17.96350
[899]	validation_0-mae:1.51732	validation_1-mae:1.83878
Fold: 2/3 score = 6.98274
[0]	validation_0-mae:17.92482	validation_1-mae:17.96107
[899]	validation_0-mae:1.52082	validation_1-mae:1.84024


[I 2023-10-08 23:41:36,138] Trial 43 finished with value: 6.994324949450903 and parameters: {'booster': 'gbtree', 'lambda': 0.009806555809549214, 'alpha': 0.40170062923802863, 'subsample': 0.5344534390234639, 'colsample_bytree': 0.8039821147576004, 'n_estimators': 900, 'max_depth': 8, 'min_child_weight': 4, 'eta': 0.04721482873478769, 'gamma': 0.00948914567947392, 'grow_policy': 'depthwise'}. Best is trial 41 with value: 6.758899281532664.


Fold: 3/3 score = 7.04794
[0]	validation_0-mae:17.93963	validation_1-mae:17.86079
[899]	validation_0-mae:1.50388	validation_1-mae:1.82346
Fold: 1/3 score = 6.93742
[0]	validation_0-mae:17.89809	validation_1-mae:17.94155
[899]	validation_0-mae:1.51330	validation_1-mae:1.84325
Fold: 2/3 score = 6.99532
[0]	validation_0-mae:17.90223	validation_1-mae:17.93882
[899]	validation_0-mae:1.50529	validation_1-mae:1.83362


[I 2023-10-08 23:42:21,486] Trial 44 finished with value: 6.976640563911171 and parameters: {'booster': 'gbtree', 'lambda': 0.009721389087072712, 'alpha': 0.4393086338618835, 'subsample': 0.547354304345077, 'colsample_bytree': 0.8017196276324279, 'n_estimators': 900, 'max_depth': 8, 'min_child_weight': 4, 'eta': 0.04844255184817886, 'gamma': 0.02945050002659535, 'grow_policy': 'depthwise'}. Best is trial 41 with value: 6.758899281532664.


Fold: 3/3 score = 6.99718
[0]	validation_0-mae:18.12413	validation_1-mae:18.04451
[999]	validation_0-mae:1.32980	validation_1-mae:1.74364
Fold: 1/3 score = 6.50055
[0]	validation_0-mae:18.08292	validation_1-mae:18.12676
[999]	validation_0-mae:1.33361	validation_1-mae:1.76496
Fold: 2/3 score = 6.57057
[0]	validation_0-mae:18.08330	validation_1-mae:18.12001
[999]	validation_0-mae:1.32763	validation_1-mae:1.75673


[I 2023-10-09 00:25:33,235] Trial 45 finished with value: 6.551550399391943 and parameters: {'booster': 'dart', 'lambda': 0.00965012888749584, 'alpha': 0.9842762221618983, 'subsample': 0.6858379830298229, 'colsample_bytree': 0.8913163153161074, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.03792582206301568, 'gamma': 0.2443561878178881, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 9.776329107921942e-05, 'skip_drop': 4.429240963896771e-06}. Best is trial 45 with value: 6.551550399391943.


Fold: 3/3 score = 6.58353
[0]	validation_0-mae:18.49724	validation_1-mae:18.41633
[999]	validation_0-mae:1.68708	validation_1-mae:1.90929
Fold: 1/3 score = 7.64231
[0]	validation_0-mae:18.45561	validation_1-mae:18.49962
[999]	validation_0-mae:1.70237	validation_1-mae:1.93570
Fold: 2/3 score = 7.75417
[0]	validation_0-mae:18.45789	validation_1-mae:18.49512
[999]	validation_0-mae:1.68174	validation_1-mae:1.91725


[I 2023-10-09 01:08:21,527] Trial 46 finished with value: 7.701888602148912 and parameters: {'booster': 'dart', 'lambda': 0.04413839407926208, 'alpha': 0.7373083613999583, 'subsample': 0.748628628306494, 'colsample_bytree': 0.9248442110303606, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 2, 'eta': 0.017298230352204198, 'gamma': 0.2584901541043415, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.0002460530034252267, 'skip_drop': 5.464381519276375e-06}. Best is trial 45 with value: 6.551550399391943.


Fold: 3/3 score = 7.70918
[0]	validation_0-mae:18.80294	validation_1-mae:18.72087
[899]	validation_0-mae:13.19428	validation_1-mae:13.13814
Fold: 1/3 score = 215.24102
[0]	validation_0-mae:18.76088	validation_1-mae:18.80500
[899]	validation_0-mae:13.16635	validation_1-mae:13.20913
Fold: 2/3 score = 217.05133
[0]	validation_0-mae:18.76291	validation_1-mae:18.80087
[899]	validation_0-mae:13.17120	validation_1-mae:13.20015


[I 2023-10-09 01:42:00,252] Trial 47 finished with value: 216.38769589082656 and parameters: {'booster': 'dart', 'lambda': 0.2785945351004937, 'alpha': 0.08448520136305139, 'subsample': 0.6800948562898947, 'colsample_bytree': 0.8807011281586038, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.00041682620943241767, 'gamma': 0.04423465520351344, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 1.5383107456359398e-05, 'skip_drop': 1.7169264477666368e-08}. Best is trial 45 with value: 6.551550399391943.


Fold: 3/3 score = 216.87074
[0]	validation_0-mae:18.74611	validation_1-mae:18.66424
[899]	validation_0-mae:2.79912	validation_1-mae:2.84490
Fold: 1/3 score = 14.23382
[0]	validation_0-mae:18.70411	validation_1-mae:18.74819
[899]	validation_0-mae:2.79309	validation_1-mae:2.86954
Fold: 2/3 score = 14.44703
[0]	validation_0-mae:18.70603	validation_1-mae:18.74390
[899]	validation_0-mae:2.79531	validation_1-mae:2.86247


[I 2023-10-09 02:16:11,960] Trial 48 finished with value: 14.376824903202321 and parameters: {'booster': 'dart', 'lambda': 0.014454633963208929, 'alpha': 0.8860111093915045, 'subsample': 0.5699838097442007, 'colsample_bytree': 0.8912614777575536, 'n_estimators': 900, 'max_depth': 9, 'min_child_weight': 2, 'eta': 0.0035555220270102952, 'gamma': 0.39886007158279546, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.001387395227365699, 'skip_drop': 5.938280298581349e-07}. Best is trial 45 with value: 6.551550399391943.


Fold: 3/3 score = 14.44962
[0]	validation_0-mae:18.80897	validation_1-mae:18.72687
[999]	validation_0-mae:17.36849	validation_1-mae:17.29173
Fold: 1/3 score = 368.35473
[0]	validation_0-mae:18.76690	validation_1-mae:18.81102
[999]	validation_0-mae:17.32997	validation_1-mae:17.37349


[I 2023-10-09 02:42:51,684] Trial 49 pruned. 


Fold: 2/3 score = 370.77628


In [16]:
trial = study.best_trial

In [17]:
print(trial.params)

{'booster': 'dart', 'lambda': 0.00965012888749584, 'alpha': 0.9842762221618983, 'subsample': 0.6858379830298229, 'colsample_bytree': 0.8913163153161074, 'n_estimators': 1000, 'max_depth': 9, 'min_child_weight': 3, 'eta': 0.03792582206301568, 'gamma': 0.2443561878178881, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 9.776329107921942e-05, 'skip_drop': 4.429240963896771e-06}


In [23]:
best_param = {
  'booster': 'dart',
  'lambda': 0.00965012888749584,
  'alpha': 0.9842762221618983,
  'subsample': 0.6858379830298229,
  'colsample_bytree': 0.8913163153161074,
  'n_estimators': 1000,
  'max_depth': 9,
  'min_child_weight': 3,
  'eta': 0.03792582206301568,
  'gamma': 0.2443561878178881,
  'grow_policy': 'depthwise',
  'sample_type': 'weighted',
  'normalize_type': 'tree',
  'rate_drop': 9.776329107921942e-05,
  'skip_drop': 4.429240963896771e-06
}

val_scores, y_tests, models = train_model(best_param, train_x, train_y, train_null_df, test=True)

[0]	validation_0-mae:18.12413	validation_1-mae:18.04451
[999]	validation_0-mae:1.32980	validation_1-mae:1.74364
Fold: 1/3 score = 6.50055
[0]	validation_0-mae:18.08292	validation_1-mae:18.12676
[999]	validation_0-mae:1.33361	validation_1-mae:1.76496
Fold: 2/3 score = 6.57057
[0]	validation_0-mae:18.08330	validation_1-mae:18.12001
[999]	validation_0-mae:1.32763	validation_1-mae:1.75673
Fold: 3/3 score = 6.58353


In [25]:
# save model
for idx, model in enumerate(models):
  file_name = f"./models/temp_model_{idx}.model"
  model.save_model(file_name)

In [27]:
train_null_df["AIR_TEMPERATURE"] = np.mean(y_tests, axis=0)

In [28]:
train_null_df.head()

Unnamed: 0,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,ARI_CO_LV,...,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year,AIR_TEMPERATURE
5,0,0,0,1,0,0,0,0,0,0,...,0,0,-0.84067,8,14,24,9,1,2015,26.11245
8,0,0,0,0,0,0,0,1,0,0,...,0,0,-0.488404,27,6,23,1,4,2017,11.093024
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0.008168,3,7,8,2,5,2018,29.269526
10,0,0,0,1,0,0,0,0,0,0,...,0,0,0.086881,14,3,33,8,1,2018,25.843529
11,0,0,0,0,0,0,0,0,0,0,...,0,0,-1.714288,8,22,51,2,0,2016,29.435951


In [29]:
assert sorted(train_df.columns) == sorted(train_null_df.columns)

In [30]:
result = pd.concat([train_df, train_null_df], axis=0).sort_index()

In [31]:
result.head(10)

Unnamed: 0,AIR_TEMPERATURE,ARI_CO_BR,ARI_CO_CA,ARI_CO_CL,ARI_CO_CN,ARI_CO_FI,ARI_CO_ID,ARI_CO_IN,ARI_CO_JP,ARI_CO_KR,...,SHIP_TYPE_CATEGORY_Container,SHIP_TYPE_CATEGORY_Tanker,WITlt0,WTI,day,hour,minute,month,weekday,year
0,15.9,0,0,0,1,0,0,0,0,0,...,0,0,0,-1.081668,15,4,3,10,3,2020
1,24.5,0,0,0,1,0,0,0,0,0,...,1,0,0,-0.188614,17,2,55,9,1,2019
2,9.4,0,0,0,1,0,0,0,0,0,...,1,0,0,-0.305226,23,6,43,2,5,2019
3,22.1,0,0,0,0,0,0,0,1,0,...,0,0,0,-1.07438,18,22,6,9,4,2020
4,22.8,0,0,0,0,0,0,0,0,0,...,1,0,0,1.209272,13,12,57,8,5,2022
5,26.11245,0,0,0,1,0,0,0,0,0,...,1,0,0,-0.84067,8,14,24,9,1,2015
6,4.4,0,0,0,0,0,0,0,1,0,...,1,0,0,-0.50784,25,21,34,1,0,2021
7,21.8,0,0,0,0,0,0,0,1,0,...,0,0,0,-0.459737,18,3,48,6,1,2019
8,11.093024,0,0,0,0,0,0,0,1,0,...,1,0,0,-0.488404,27,6,23,1,4,2017
9,29.269526,0,0,0,0,0,0,0,0,0,...,0,0,0,0.008168,3,7,8,2,5,2018


In [32]:
result.to_parquet("./data/train_v2_1.parquet")

In [33]:
test_temps = []
for model in models:
  test_temps.append(model.predict(test_df))

In [34]:
test_df["AIR_TEMPERATURE"] = np.mean(test_temps, axis=0)

In [35]:
assert sorted(train_df.columns) == sorted(test_df.columns)

In [36]:
test_df.to_parquet("./data/test_v2_1.parquet")