In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'loss'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 10000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
INPUT = Path("../input/tabular-playground-series-aug-2021")

train = pd.read_csv(INPUT / "train.csv")
test = pd.read_csv(INPUT / "test.csv")
submission = pd.read_csv(INPUT / "sample_submission.csv")

In [5]:
train['lgb_pred'] = np.load("../input/tps-aug-2021-lgb-no-pseudo/lgb_oof.npy")
test['lgb_pred'] = np.load("../input/tps-aug-2021-lgb-no-pseudo/lgb_pred.npy")/10

train['xgb_pred'] = np.load("../input/tps-aug-2021-xgb-no-pseudo-pred/3xgb_oof.npy")+np.load("../input/tps-aug-2021-xgb-no-pseudo-pred/4xgb_oof.npy")
test['xgb_pred'] = (np.load("../input/tps-aug-2021-xgb-no-pseudo-pred/3xgb_pred.npy")+np.load("../input/tps-aug-2021-xgb-no-pseudo-pred/4xgb_pred.npy"))/10

train['ctb_pred'] = np.load("../input/tps-aug-catb-no-pseudo/2ctb_oof.npy")+np.load("../input/tps-aug-catb-no-pseudo/3ctb_oof.npy")
test['ctb_pred'] = (np.load("../input/tps-aug-catb-no-pseudo/2ctb_pred.npy")+np.load("../input/tps-aug-catb-no-pseudo/3ctb_pred.npy"))/10


In [6]:
test.head()

Unnamed: 0,id,f0,f1,f2,f3,f4,f5,f6,f7,f8,...,f93,f94,f95,f96,f97,f98,f99,lgb_pred,xgb_pred,ctb_pred
0,250000,0.812665,15,-1.23912,-0.893251,295.577,15.8712,23.0436,0.942256,29.898,...,1.69075,1.0593,-3.01057,1.94664,0.52947,1.38695,8.78767,8.120738,7.732295,8.092161
1,250001,0.190344,131,-0.501361,0.801921,64.8866,3.09703,344.805,0.807194,38.4219,...,1.84351,0.251895,4.44057,1.90309,0.248534,0.863881,11.7939,4.629908,4.877947,4.447047
2,250002,0.919671,19,-0.057382,0.901419,11961.2,16.3965,273.24,-0.0033,37.94,...,1.551,-0.559157,17.8386,1.83385,0.931796,2.33687,9.054,8.57212,8.540636,8.321396
3,250003,0.860985,19,-0.549509,0.471799,7501.6,2.80698,71.0817,0.792136,0.395235,...,1.4139,0.329272,0.802437,2.23251,0.893348,1.35947,4.84833,7.496467,7.472929,7.393689
4,250004,0.313229,89,0.588509,0.167705,2931.26,4.34986,1.57187,1.1183,7.75463,...,1.5802,-0.191021,26.253,2.68238,0.361923,1.5328,3.7066,7.207754,6.996287,7.29774


# Preprocessing

In [7]:
scale_features = [col for col in test.columns if 'f' in col]

ss = StandardScaler()
train[scale_features] = ss.fit_transform(train[scale_features])
test[scale_features] = ss.transform(test[scale_features])

In [8]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

In [9]:
useful_features = scale_features + ['lgb_pred', 'xgb_pred','ctb_pred']

# Optuna

In [10]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [11]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=train[useful_features], y=train[target]):
  """
  """
  param_space = {
                    'objective': 'regression',
              'metric':'rmse',
              'n_jobs':-1,
               #'device':'gpu',  # Use GPU acceleration
               # 'gpu_platform_id': 0,
                #    'gpu_device_id': 0,
               'n_estimators':N_ESTIMATORS,
              'learning_rate':trial.suggest_uniform('learning_rate', 1e-2, 1e-1),
               'subsample': trial.suggest_uniform('subsample', 0.67, 1.0),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.356, 0.756),
                'subsample_freq': trial.suggest_int('subsample_freq', 5, 9),
             #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 8.55, 32.55),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 30.5, 54.5),
            'min_child_weight':trial.suggest_uniform('min_child_weight', 8, 208),
                 'min_child_samples':trial.suggest_int('min_child_samples', 164, 284),
          'importance_type': 'gain'
                }
            

  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  #lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  
  pred=np.zeros(y_va.shape[0])
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  
  #X_tr = apply_noise_rn(X_tr)

  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_tr, 
        y_tr,
        eval_set=[(X_va, y_va)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
    )


    pred += model.predict(X_va) / len(seed_list)
    
  #elapsed = time.time() - start
  rmse = mean_squared_error(y_va, pred, squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [12]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-28 23:16:51,873][0m A new study created in memory with name: no-name-6c66f46f-643c-4671-8e53-e016de8183a8[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[86]	valid_0's rmse: 7.75296
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:17:13,684][0m Trial 0 finished with value: 7.751935784723151 and parameters: {'learning_rate': 0.03876032831466433, 'subsample': 0.9341731704447493, 'colsample_bytree': 0.5541106228231737, 'subsample_freq': 9, 'reg_lambda': 17.551398149476483, 'reg_alpha': 44.64636575856518, 'min_child_weight': 83.67502043238217, 'min_child_samples': 176}. Best is trial 0 with value: 7.751935784723151.[0m


Early stopping, best iteration is:
[96]	valid_0's rmse: 7.75188
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[243]	valid_0's rmse: 7.75144
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[231]	valid_0's rmse: 7.75216


[32m[I 2021-08-28 23:17:46,019][0m Trial 1 finished with value: 7.751588257735942 and parameters: {'learning_rate': 0.014576588483730888, 'subsample': 0.8313906708649433, 'colsample_bytree': 0.5719171102931042, 'subsample_freq': 8, 'reg_lambda': 29.088531487231364, 'reg_alpha': 49.36011218140722, 'min_child_weight': 137.51081379787257, 'min_child_samples': 208}. Best is trial 1 with value: 7.751588257735942.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[43]	valid_0's rmse: 7.75431
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:18:03,114][0m Trial 2 finished with value: 7.752907106626557 and parameters: {'learning_rate': 0.07101278915123728, 'subsample': 0.84561630776699, 'colsample_bytree': 0.4927637047397653, 'subsample_freq': 8, 'reg_lambda': 9.41805992446753, 'reg_alpha': 43.568566088315336, 'min_child_weight': 33.413948254592846, 'min_child_samples': 254}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[41]	valid_0's rmse: 7.7532
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[95]	valid_0's rmse: 7.75339
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:18:23,725][0m Trial 3 finished with value: 7.752954087435198 and parameters: {'learning_rate': 0.02850482954105315, 'subsample': 0.6867996729575517, 'colsample_bytree': 0.5187563341870519, 'subsample_freq': 8, 'reg_lambda': 8.595589505392201, 'reg_alpha': 33.679022800278375, 'min_child_weight': 180.28306415059683, 'min_child_samples': 179}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[99]	valid_0's rmse: 7.75343
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[43]	valid_0's rmse: 7.75263
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:18:41,151][0m Trial 4 finished with value: 7.752182320567776 and parameters: {'learning_rate': 0.08359606567378186, 'subsample': 0.926019146542073, 'colsample_bytree': 0.5297743166431925, 'subsample_freq': 6, 'reg_lambda': 22.67126012740681, 'reg_alpha': 38.23437265834269, 'min_child_weight': 92.83586518588169, 'min_child_samples': 180}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[41]	valid_0's rmse: 7.75363
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[37]	valid_0's rmse: 7.75383
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:19:02,435][0m Trial 5 finished with value: 7.7527204204672095 and parameters: {'learning_rate': 0.07637346186949644, 'subsample': 0.7973414921741953, 'colsample_bytree': 0.74743325323928, 'subsample_freq': 9, 'reg_lambda': 23.804310646184575, 'reg_alpha': 41.56554835214994, 'min_child_weight': 114.68208104817975, 'min_child_samples': 270}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[33]	valid_0's rmse: 7.75344
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[217]	valid_0's rmse: 7.75232
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[255]	valid_0's rmse: 7.75178


[32m[I 2021-08-28 23:19:37,798][0m Trial 6 finished with value: 7.75174524954131 and parameters: {'learning_rate': 0.014537386641364593, 'subsample': 0.7514059048013867, 'colsample_bytree': 0.6999675719787736, 'subsample_freq': 7, 'reg_lambda': 18.688711022850537, 'reg_alpha': 32.84816626303379, 'min_child_weight': 145.7616963933344, 'min_child_samples': 208}. Best is trial 1 with value: 7.751588257735942.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[136]	valid_0's rmse: 7.75261
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[123]	valid_0's rmse: 7.75234


[32m[I 2021-08-28 23:20:01,481][0m Trial 7 finished with value: 7.752164372404876 and parameters: {'learning_rate': 0.029447569206850342, 'subsample': 0.9918981539400733, 'colsample_bytree': 0.47390071054750055, 'subsample_freq': 9, 'reg_lambda': 29.945370751178373, 'reg_alpha': 43.65437306590776, 'min_child_weight': 118.13833420540526, 'min_child_samples': 266}. Best is trial 1 with value: 7.751588257735942.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[30]	valid_0's rmse: 7.75532
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:20:17,266][0m Trial 8 finished with value: 7.754335840093565 and parameters: {'learning_rate': 0.09405259315146094, 'subsample': 0.9721829792890557, 'colsample_bytree': 0.4185665626984949, 'subsample_freq': 9, 'reg_lambda': 12.817603525253148, 'reg_alpha': 45.958478981070826, 'min_child_weight': 28.31300951992212, 'min_child_samples': 184}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[36]	valid_0's rmse: 7.75507
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[177]	valid_0's rmse: 7.75303
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[156]	valid_0's rmse: 7.75295


[32m[I 2021-08-28 23:20:46,367][0m Trial 9 finished with value: 7.752587450958584 and parameters: {'learning_rate': 0.02020254039147458, 'subsample': 0.6943924045551781, 'colsample_bytree': 0.6854815511413586, 'subsample_freq': 7, 'reg_lambda': 21.491501417919373, 'reg_alpha': 46.15842902929513, 'min_child_weight': 49.345344018419226, 'min_child_samples': 168}. Best is trial 1 with value: 7.751588257735942.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[72]	valid_0's rmse: 7.75479
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:21:08,045][0m Trial 10 finished with value: 7.752975806189387 and parameters: {'learning_rate': 0.05177265034118628, 'subsample': 0.8587498472955256, 'colsample_bytree': 0.6207081014797484, 'subsample_freq': 5, 'reg_lambda': 32.20224078619408, 'reg_alpha': 53.98523332691428, 'min_child_weight': 194.22459814376623, 'min_child_samples': 221}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[59]	valid_0's rmse: 7.75247
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[290]	valid_0's rmse: 7.75242
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[311]	valid_0's rmse: 7.75184


[32m[I 2021-08-28 23:21:47,490][0m Trial 11 finished with value: 7.7519157875005344 and parameters: {'learning_rate': 0.01141740934574591, 'subsample': 0.7579124228028785, 'colsample_bytree': 0.6492695256587095, 'subsample_freq': 7, 'reg_lambda': 28.174815762987294, 'reg_alpha': 53.26093214995501, 'min_child_weight': 154.47807487266425, 'min_child_samples': 211}. Best is trial 1 with value: 7.751588257735942.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[306]	valid_0's rmse: 7.75203
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[241]	valid_0's rmse: 7.75266


[32m[I 2021-08-28 23:22:27,434][0m Trial 12 finished with value: 7.752060637376715 and parameters: {'learning_rate': 0.012399975624671544, 'subsample': 0.7562136721073791, 'colsample_bytree': 0.7480227668473356, 'subsample_freq': 6, 'reg_lambda': 17.360837882428214, 'reg_alpha': 31.201850795148758, 'min_child_weight': 151.4969359152438, 'min_child_samples': 203}. Best is trial 1 with value: 7.751588257735942.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[56]	valid_0's rmse: 7.75288
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:22:48,535][0m Trial 13 finished with value: 7.7523311302493685 and parameters: {'learning_rate': 0.04704917002352326, 'subsample': 0.7927893416833351, 'colsample_bytree': 0.6149026789208611, 'subsample_freq': 8, 'reg_lambda': 27.39731800018658, 'reg_alpha': 49.80335596070402, 'min_child_weight': 151.40097545965114, 'min_child_samples': 238}. Best is trial 1 with value: 7.751588257735942.[0m


Early stopping, best iteration is:
[68]	valid_0's rmse: 7.75313
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[304]	valid_0's rmse: 7.75205
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[288]	valid_0's rmse: 7.75128


[32m[I 2021-08-28 23:23:30,671][0m Trial 14 finished with value: 7.751478358030661 and parameters: {'learning_rate': 0.012160304822817415, 'subsample': 0.8790302032736914, 'colsample_bytree': 0.6937682134174559, 'subsample_freq': 6, 'reg_lambda': 14.649230949175333, 'reg_alpha': 49.72020510829387, 'min_child_weight': 130.41541516260526, 'min_child_samples': 198}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[137]	valid_0's rmse: 7.75181
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[125]	valid_0's rmse: 7.75241


[32m[I 2021-08-28 23:23:56,128][0m Trial 15 finished with value: 7.751821293525129 and parameters: {'learning_rate': 0.024713279273176764, 'subsample': 0.8951379266034244, 'colsample_bytree': 0.5856424207756324, 'subsample_freq': 5, 'reg_lambda': 14.523162825537511, 'reg_alpha': 49.788671277651474, 'min_child_weight': 69.80687554851974, 'min_child_samples': 195}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[105]	valid_0's rmse: 7.75467
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[113]	valid_0's rmse: 7.75266


[32m[I 2021-08-28 23:24:14,143][0m Trial 16 finished with value: 7.753043116133106 and parameters: {'learning_rate': 0.04038494401706518, 'subsample': 0.8779812705144844, 'colsample_bytree': 0.37791037079330203, 'subsample_freq': 6, 'reg_lambda': 13.787331535194744, 'reg_alpha': 50.474525302941586, 'min_child_weight': 128.42696881058887, 'min_child_samples': 236}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[57]	valid_0's rmse: 7.75273
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:24:36,170][0m Trial 17 finished with value: 7.7524656293616125 and parameters: {'learning_rate': 0.06596567503195679, 'subsample': 0.8146765466205443, 'colsample_bytree': 0.673747590096067, 'subsample_freq': 6, 'reg_lambda': 26.22827755799998, 'reg_alpha': 48.01228373033454, 'min_child_weight': 172.96075733850725, 'min_child_samples': 225}. Best is trial 14 with value: 7.751478358030661.[0m


Early stopping, best iteration is:
[46]	valid_0's rmse: 7.75406
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[99]	valid_0's rmse: 7.75306
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:24:56,263][0m Trial 18 finished with value: 7.7525021524183435 and parameters: {'learning_rate': 0.035716281524891064, 'subsample': 0.9104669561578124, 'colsample_bytree': 0.45101150501430953, 'subsample_freq': 8, 'reg_lambda': 11.708957596242655, 'reg_alpha': 52.6489279364797, 'min_child_weight': 97.35948395172782, 'min_child_samples': 193}. Best is trial 14 with value: 7.751478358030661.[0m


Early stopping, best iteration is:
[104]	valid_0's rmse: 7.75282
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[59]	valid_0's rmse: 7.75356
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:25:15,391][0m Trial 19 finished with value: 7.753067302014412 and parameters: {'learning_rate': 0.058559725840721694, 'subsample': 0.8231084779187535, 'colsample_bytree': 0.5752207305033694, 'subsample_freq': 5, 'reg_lambda': 32.43777739391998, 'reg_alpha': 40.40653647292761, 'min_child_weight': 130.41824885890617, 'min_child_samples': 222}. Best is trial 14 with value: 7.751478358030661.[0m


Early stopping, best iteration is:
[52]	valid_0's rmse: 7.75405
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[156]	valid_0's rmse: 7.75224
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[178]	valid_0's rmse: 7.75209


[32m[I 2021-08-28 23:25:49,212][0m Trial 20 finished with value: 7.751933891552551 and parameters: {'learning_rate': 0.019653898301540525, 'subsample': 0.956276616032565, 'colsample_bytree': 0.740211533665825, 'subsample_freq': 7, 'reg_lambda': 15.760251045281631, 'reg_alpha': 47.49352918808, 'min_child_weight': 71.81080087341968, 'min_child_samples': 164}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[306]	valid_0's rmse: 7.75226
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[290]	valid_0's rmse: 7.75245


[32m[I 2021-08-28 23:26:28,889][0m Trial 21 finished with value: 7.752130371336619 and parameters: {'learning_rate': 0.010944410100646076, 'subsample': 0.7376189301724999, 'colsample_bytree': 0.6951325073983015, 'subsample_freq': 7, 'reg_lambda': 19.365480931673936, 'reg_alpha': 36.73149587593763, 'min_child_weight': 142.71827424039708, 'min_child_samples': 209}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[174]	valid_0's rmse: 7.75356
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[195]	valid_0's rmse: 7.75274


[32m[I 2021-08-28 23:27:00,422][0m Trial 22 finished with value: 7.752804027296953 and parameters: {'learning_rate': 0.017299335390377597, 'subsample': 0.7144162555784495, 'colsample_bytree': 0.7135602571691021, 'subsample_freq': 6, 'reg_lambda': 19.417653349272335, 'reg_alpha': 31.174695499404276, 'min_child_weight': 169.40131261824743, 'min_child_samples': 196}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[367]	valid_0's rmse: 7.75201
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[336]	valid_0's rmse: 7.75149


[32m[I 2021-08-28 23:27:46,050][0m Trial 23 finished with value: 7.751594003213146 and parameters: {'learning_rate': 0.010015371773142233, 'subsample': 0.8633840772701261, 'colsample_bytree': 0.6360260441600412, 'subsample_freq': 8, 'reg_lambda': 24.66379270242778, 'reg_alpha': 51.50161390739713, 'min_child_weight': 207.8353982938331, 'min_child_samples': 211}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[397]	valid_0's rmse: 7.75233
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[339]	valid_0's rmse: 7.75208


[32m[I 2021-08-28 23:28:30,642][0m Trial 24 finished with value: 7.752027756815544 and parameters: {'learning_rate': 0.010610122220915404, 'subsample': 0.8677894396066329, 'colsample_bytree': 0.6404100085693955, 'subsample_freq': 8, 'reg_lambda': 25.000750582618455, 'reg_alpha': 52.18824816465499, 'min_child_weight': 201.58645815325195, 'min_child_samples': 218}. Best is trial 14 with value: 7.751478358030661.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[137]	valid_0's rmse: 7.75163
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[155]	valid_0's rmse: 7.75191


[32m[I 2021-08-28 23:28:58,425][0m Trial 25 finished with value: 7.751445395722502 and parameters: {'learning_rate': 0.02353337642138889, 'subsample': 0.8927518164857942, 'colsample_bytree': 0.6082521012761244, 'subsample_freq': 8, 'reg_lambda': 30.353719041677138, 'reg_alpha': 51.51192793124132, 'min_child_weight': 191.56782210566885, 'min_child_samples': 237}. Best is trial 25 with value: 7.751445395722502.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[151]	valid_0's rmse: 7.75213
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[150]	valid_0's rmse: 7.75229


[32m[I 2021-08-28 23:29:26,153][0m Trial 26 finished with value: 7.751866645759807 and parameters: {'learning_rate': 0.025075274130332545, 'subsample': 0.8935170722900027, 'colsample_bytree': 0.5880349581631927, 'subsample_freq': 8, 'reg_lambda': 29.55050077304464, 'reg_alpha': 48.48869594574464, 'min_child_weight': 187.34335854299854, 'min_child_samples': 236}. Best is trial 25 with value: 7.751445395722502.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[94]	valid_0's rmse: 7.75369
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[104]	valid_0's rmse: 7.75251


[32m[I 2021-08-28 23:29:47,619][0m Trial 27 finished with value: 7.752607729780717 and parameters: {'learning_rate': 0.03517758916497572, 'subsample': 0.840279913646172, 'colsample_bytree': 0.54203643455489, 'subsample_freq': 7, 'reg_lambda': 30.776649421094703, 'reg_alpha': 54.14253299348935, 'min_child_weight': 166.517897591679, 'min_child_samples': 249}. Best is trial 25 with value: 7.751445395722502.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[120]	valid_0's rmse: 7.75327
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[124]	valid_0's rmse: 7.7528


[32m[I 2021-08-28 23:30:12,294][0m Trial 28 finished with value: 7.75267318471845 and parameters: {'learning_rate': 0.024397805434891028, 'subsample': 0.788090407321264, 'colsample_bytree': 0.6005880126907872, 'subsample_freq': 9, 'reg_lambda': 31.639933753896077, 'reg_alpha': 51.02497908675723, 'min_child_weight': 129.37139229937793, 'min_child_samples': 247}. Best is trial 25 with value: 7.751445395722502.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[99]	valid_0's rmse: 7.75222
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 23:30:35,420][0m Trial 29 finished with value: 7.752096571114211 and parameters: {'learning_rate': 0.04355472003164232, 'subsample': 0.9431199407960068, 'colsample_bytree': 0.5699524094431566, 'subsample_freq': 8, 'reg_lambda': 28.11373286833887, 'reg_alpha': 45.950880697597036, 'min_child_weight': 103.79088212196048, 'min_child_samples': 230}. Best is trial 25 with value: 7.751445395722502.[0m


Early stopping, best iteration is:
[86]	valid_0's rmse: 7.7531
Number of finished trials: 30
Best trial: {'learning_rate': 0.02353337642138889, 'subsample': 0.8927518164857942, 'colsample_bytree': 0.6082521012761244, 'subsample_freq': 8, 'reg_lambda': 30.353719041677138, 'reg_alpha': 51.51192793124132, 'min_child_weight': 191.56782210566885, 'min_child_samples': 237}


In [13]:
study.best_params

{'learning_rate': 0.02353337642138889,
 'subsample': 0.8927518164857942,
 'colsample_bytree': 0.6082521012761244,
 'subsample_freq': 8,
 'reg_lambda': 30.353719041677138,
 'reg_alpha': 51.51192793124132,
 'min_child_weight': 191.56782210566885,
 'min_child_samples': 237}

# Log

7.751556608663328 no noise ver1