In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'loss'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 10000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
INPUT = Path("../input/tabular-playground-series-aug-2021")

train = pd.read_csv(INPUT / "train.csv")
test = pd.read_csv(INPUT / "test.csv")
submission = pd.read_csv(INPUT / "sample_submission.csv")

# Preprocessing

In [5]:
scale_features = [col for col in test.columns if 'f' in col]

ss = StandardScaler()
train[scale_features] = ss.fit_transform(train[scale_features])
test[scale_features] = ss.transform(test[scale_features])

In [6]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Optuna

In [7]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [8]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=train[scale_features], y=train[target]):
  """
  """
  param_space = {
                    'objective': 'regression',
              'metric':'rmse',
               #'device':'gpu',  # Use GPU acceleration
               # 'gpu_platform_id': 0,
                #    'gpu_device_id': 0,
               'n_estimators':N_ESTIMATORS,
              'learning_rate':trial.suggest_uniform('learning_rate', 1e-3, 5e-2),
               'subsample': trial.suggest_uniform('subsample', 0.56, 0.76),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.386, 0.586),
                'subsample_freq': trial.suggest_int('subsample_freq', 4, 6),
             #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 34, 46),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 20, 31),
            'min_child_weight':trial.suggest_uniform('min_child_weight', 400, 500),
                 'min_child_samples':trial.suggest_int('min_child_samples', 94, 118),
          'importance_type': 'gain'
             # 'num_leaves':trial.suggest_int('num_leaves', 10, 200),
              #'max_depth':trial.suggest_int('max_depth', 5, 50),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
              # 'n_jobs' : -1,
                #'max_bin':trial.suggest_int('max_bin', 300, 1000),
              #'cat_smooth':trial.suggest_int('cat_smooth', 5, 100),
              #'cat_l2':trial.suggest_loguniform('cat_l2', 1e-3, 100)
                }
            

  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  #lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  
  pred=np.zeros(y_va.shape[0])
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  
  #X_tr = apply_noise_rn(X_tr)

  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_tr, 
        y_tr,
        eval_set=[(X_va, y_va)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
    )


    pred += model.predict(X_va) / len(seed_list)
    
  #elapsed = time.time() - start
  rmse = mean_squared_error(y_va, pred, squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [9]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-21 04:13:06,749][0m A new study created in memory with name: no-name-cf6f382e-3977-4728-9d84-0ac2baadb394[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76623
[2000]	valid_0's rmse: 7.75951
[3000]	valid_0's rmse: 7.75829
Early stopping, best iteration is:
[3130]	valid_0's rmse: 7.75782
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76699
[2000]	valid_0's rmse: 7.75942
[3000]	valid_0's rmse: 7.75654
Early stopping, best iteration is:
[3043]	valid_0's rmse: 7.75627


[32m[I 2021-08-21 04:16:37,011][0m Trial 0 finished with value: 7.75486952004987 and parameters: {'learning_rate': 0.01376508999204714, 'subsample': 0.6948557461507425, 'colsample_bytree': 0.5127040908887421, 'subsample_freq': 4, 'reg_lambda': 42.010560753005905, 'reg_alpha': 27.268524196259943, 'min_child_weight': 455.1441524357472, 'min_child_samples': 103}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[376]	valid_0's rmse: 7.76911
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[496]	valid_0's rmse: 7.76599


[32m[I 2021-08-21 04:17:24,337][0m Trial 1 finished with value: 7.763225243805257 and parameters: {'learning_rate': 0.048983512087310374, 'subsample': 0.7381746742428232, 'colsample_bytree': 0.5458766566819864, 'subsample_freq': 4, 'reg_lambda': 39.32802748306938, 'reg_alpha': 23.62970914203234, 'min_child_weight': 432.6772664383597, 'min_child_samples': 97}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76479
Early stopping, best iteration is:
[862]	valid_0's rmse: 7.76426
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76236
Early stopping, best iteration is:
[1000]	valid_0's rmse: 7.76236


[32m[I 2021-08-21 04:18:36,822][0m Trial 2 finished with value: 7.759611822811327 and parameters: {'learning_rate': 0.03211810742643947, 'subsample': 0.7224522648844802, 'colsample_bytree': 0.4780275329384099, 'subsample_freq': 6, 'reg_lambda': 40.89628979622917, 'reg_alpha': 29.192861679746592, 'min_child_weight': 443.7370419025346, 'min_child_samples': 107}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[738]	valid_0's rmse: 7.76431
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[674]	valid_0's rmse: 7.76358


[32m[I 2021-08-21 04:19:29,007][0m Trial 3 finished with value: 7.759103283652351 and parameters: {'learning_rate': 0.04266290028246492, 'subsample': 0.7107985122977041, 'colsample_bytree': 0.40006849275129797, 'subsample_freq': 5, 'reg_lambda': 41.88404455946339, 'reg_alpha': 22.016680058245832, 'min_child_weight': 429.95027079207455, 'min_child_samples': 98}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76432
[2000]	valid_0's rmse: 7.76045
Early stopping, best iteration is:
[1980]	valid_0's rmse: 7.76024
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76536
[2000]	valid_0's rmse: 7.76092
Early stopping, best iteration is:
[2211]	valid_0's rmse: 7.76034


[32m[I 2021-08-21 04:21:34,351][0m Trial 4 finished with value: 7.75755241641818 and parameters: {'learning_rate': 0.017290447396450914, 'subsample': 0.5911043416782338, 'colsample_bytree': 0.4137819741956466, 'subsample_freq': 5, 'reg_lambda': 43.556032870106186, 'reg_alpha': 24.627846358884195, 'min_child_weight': 495.0350717782366, 'min_child_samples': 108}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76231
Early stopping, best iteration is:
[1345]	valid_0's rmse: 7.75934
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76175
Early stopping, best iteration is:
[1340]	valid_0's rmse: 7.75995


[32m[I 2021-08-21 04:23:16,209][0m Trial 5 finished with value: 7.757207940709281 and parameters: {'learning_rate': 0.02092272122532626, 'subsample': 0.711578082605729, 'colsample_bytree': 0.5140251982307831, 'subsample_freq': 6, 'reg_lambda': 34.28115918170003, 'reg_alpha': 22.925869610027892, 'min_child_weight': 488.29642910594646, 'min_child_samples': 109}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[591]	valid_0's rmse: 7.76784
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[504]	valid_0's rmse: 7.76889


[32m[I 2021-08-21 04:23:59,420][0m Trial 6 finished with value: 7.762804153064836 and parameters: {'learning_rate': 0.04780517982516687, 'subsample': 0.6150523076459704, 'colsample_bytree': 0.41160716275067794, 'subsample_freq': 6, 'reg_lambda': 45.0194849233784, 'reg_alpha': 24.192243788531684, 'min_child_weight': 412.82449766366943, 'min_child_samples': 98}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77892
[2000]	valid_0's rmse: 7.76383
[3000]	valid_0's rmse: 7.75893
[4000]	valid_0's rmse: 7.75737
Early stopping, best iteration is:
[4698]	valid_0's rmse: 7.75635
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77927
[2000]	valid_0's rmse: 7.76409
[3000]	valid_0's rmse: 7.75998
Early stopping, best iteration is:
[3610]	valid_0's rmse: 7.75848


[32m[I 2021-08-21 04:28:11,723][0m Trial 7 finished with value: 7.756402547048875 and parameters: {'learning_rate': 0.007781972831352063, 'subsample': 0.7359082913064419, 'colsample_bytree': 0.4229940369444981, 'subsample_freq': 6, 'reg_lambda': 41.474963108937565, 'reg_alpha': 24.926691053120344, 'min_child_weight': 460.64889755124887, 'min_child_samples': 106}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[427]	valid_0's rmse: 7.76629
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[507]	valid_0's rmse: 7.76776


[32m[I 2021-08-21 04:28:59,945][0m Trial 8 finished with value: 7.762125654696452 and parameters: {'learning_rate': 0.04658013447300568, 'subsample': 0.6219323510803889, 'colsample_bytree': 0.5474789781311051, 'subsample_freq': 5, 'reg_lambda': 43.6567427226896, 'reg_alpha': 24.902670850625256, 'min_child_weight': 451.8652627850859, 'min_child_samples': 102}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.80613
[2000]	valid_0's rmse: 7.78577
[3000]	valid_0's rmse: 7.77454
[4000]	valid_0's rmse: 7.76818
[5000]	valid_0's rmse: 7.7646
[6000]	valid_0's rmse: 7.76208
[7000]	valid_0's rmse: 7.76048
[8000]	valid_0's rmse: 7.75937
[9000]	valid_0's rmse: 7.75849
[10000]	valid_0's rmse: 7.7575
Did not meet early stopping. Best iteration is:
[10000]	valid_0's rmse: 7.7575
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.80634
[2000]	valid_0's rmse: 7.78587
[3000]	valid_0's rmse: 7.77516
[4000]	valid_0's rmse: 7.76858
[5000]	valid_0's rmse: 7.76453
[6000]	valid_0's rmse: 7.76217
[7000]	valid_0's rmse: 7.76056
[8000]	valid_0's rmse: 7.75935
[9000]	valid_0's rmse: 7.7585
[10000]	valid_0's rmse: 7.75786
Did not meet early stopping. Best iteration is:
[9910]	valid_0's rmse: 7.75782


[32m[I 2021-08-21 04:39:52,696][0m Trial 9 finished with value: 7.757252327622047 and parameters: {'learning_rate': 0.0029373204857207043, 'subsample': 0.5897542830437769, 'colsample_bytree': 0.5076099069104143, 'subsample_freq': 4, 'reg_lambda': 39.97494691209546, 'reg_alpha': 25.30024646857647, 'min_child_weight': 481.9415995242833, 'min_child_samples': 99}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76859
[2000]	valid_0's rmse: 7.76053
Early stopping, best iteration is:
[2258]	valid_0's rmse: 7.75976
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76865
[2000]	valid_0's rmse: 7.7612
[3000]	valid_0's rmse: 7.75993
Early stopping, best iteration is:
[2839]	valid_0's rmse: 7.75953


[32m[I 2021-08-21 04:43:02,134][0m Trial 10 finished with value: 7.757952956655497 and parameters: {'learning_rate': 0.012070105203891169, 'subsample': 0.6692411837668187, 'colsample_bytree': 0.584907104620968, 'subsample_freq': 4, 'reg_lambda': 36.84445931127324, 'reg_alpha': 28.619329745934465, 'min_child_weight': 470.90752991433817, 'min_child_samples': 117}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.80382
[2000]	valid_0's rmse: 7.78274
[3000]	valid_0's rmse: 7.77171
[4000]	valid_0's rmse: 7.76619
[5000]	valid_0's rmse: 7.76285
[6000]	valid_0's rmse: 7.76068
[7000]	valid_0's rmse: 7.75916
[8000]	valid_0's rmse: 7.75799
[9000]	valid_0's rmse: 7.75721
[10000]	valid_0's rmse: 7.75677
Did not meet early stopping. Best iteration is:
[9884]	valid_0's rmse: 7.75673
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.80349
[2000]	valid_0's rmse: 7.78277
[3000]	valid_0's rmse: 7.77185
[4000]	valid_0's rmse: 7.76599
[5000]	valid_0's rmse: 7.76266
[6000]	valid_0's rmse: 7.76051
[7000]	valid_0's rmse: 7.75869
[8000]	valid_0's rmse: 7.75778
[9000]	valid_0's rmse: 7.75669
[10000]	valid_0's rmse: 7.75614
Did not meet early stopping. Best iteration is:
[9981]	valid_0's rmse: 7.75612


[32m[I 2021-08-21 04:53:22,996][0m Trial 11 finished with value: 7.75599032859216 and parameters: {'learning_rate': 0.0032717620334188225, 'subsample': 0.6865893645440614, 'colsample_bytree': 0.45883718424238473, 'subsample_freq': 6, 'reg_lambda': 37.719559530618675, 'reg_alpha': 27.805974939334824, 'min_child_weight': 465.0414811192121, 'min_child_samples': 114}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.80957
[2000]	valid_0's rmse: 7.78859
[3000]	valid_0's rmse: 7.77698
[4000]	valid_0's rmse: 7.77017
[5000]	valid_0's rmse: 7.76615
[6000]	valid_0's rmse: 7.76315
[7000]	valid_0's rmse: 7.76138
[8000]	valid_0's rmse: 7.75979
[9000]	valid_0's rmse: 7.75869
Early stopping, best iteration is:
[9361]	valid_0's rmse: 7.75845
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.80964
[2000]	valid_0's rmse: 7.78876
[3000]	valid_0's rmse: 7.77759
[4000]	valid_0's rmse: 7.77055
[5000]	valid_0's rmse: 7.76615
[6000]	valid_0's rmse: 7.76308
[7000]	valid_0's rmse: 7.76104
[8000]	valid_0's rmse: 7.75974
[9000]	valid_0's rmse: 7.75849
[10000]	valid_0's rmse: 7.75787
Did not meet early stopping. Best iteration is:
[9939]	valid_0's rmse: 7.75784


[32m[I 2021-08-21 05:03:59,641][0m Trial 12 finished with value: 7.757850459003973 and parameters: {'learning_rate': 0.002675759057814543, 'subsample': 0.6725365896624663, 'colsample_bytree': 0.4550633468481581, 'subsample_freq': 4, 'reg_lambda': 37.2591958202236, 'reg_alpha': 27.58497456503371, 'min_child_weight': 466.96087004792037, 'min_child_samples': 115}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76435
Early stopping, best iteration is:
[1025]	valid_0's rmse: 7.76423
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76085
Early stopping, best iteration is:
[1031]	valid_0's rmse: 7.76061


[32m[I 2021-08-21 05:05:18,051][0m Trial 13 finished with value: 7.759367011500265 and parameters: {'learning_rate': 0.02722433661517163, 'subsample': 0.6872114289243363, 'colsample_bytree': 0.45782436706157803, 'subsample_freq': 5, 'reg_lambda': 37.77660927257911, 'reg_alpha': 30.982352297857663, 'min_child_weight': 448.20422137641356, 'min_child_samples': 113}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76889
[2000]	valid_0's rmse: 7.76023
Early stopping, best iteration is:
[2089]	valid_0's rmse: 7.75999
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76859
[2000]	valid_0's rmse: 7.75992
Early stopping, best iteration is:
[2569]	valid_0's rmse: 7.75834


[32m[I 2021-08-21 05:08:01,458][0m Trial 14 finished with value: 7.757616340844274 and parameters: {'learning_rate': 0.011838236288386336, 'subsample': 0.6408274006818689, 'colsample_bytree': 0.49845627604287535, 'subsample_freq': 5, 'reg_lambda': 34.63935557780383, 'reg_alpha': 27.012331674374046, 'min_child_weight': 480.7543550408765, 'min_child_samples': 103}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.82509
[2000]	valid_0's rmse: 7.80622
[3000]	valid_0's rmse: 7.79401
[4000]	valid_0's rmse: 7.78544
[5000]	valid_0's rmse: 7.77904
[6000]	valid_0's rmse: 7.77418
[7000]	valid_0's rmse: 7.77044
[8000]	valid_0's rmse: 7.76792
[9000]	valid_0's rmse: 7.76577
[10000]	valid_0's rmse: 7.76393
Did not meet early stopping. Best iteration is:
[9999]	valid_0's rmse: 7.76393
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.82504
[2000]	valid_0's rmse: 7.80618
[3000]	valid_0's rmse: 7.7941
[4000]	valid_0's rmse: 7.78543
[5000]	valid_0's rmse: 7.77896
[6000]	valid_0's rmse: 7.77411
[7000]	valid_0's rmse: 7.7707
[8000]	valid_0's rmse: 7.76806
[9000]	valid_0's rmse: 7.76582
[10000]	valid_0's rmse: 7.76413
Did not meet early stopping. Best iteration is:
[9999]	valid_0's rmse: 7.76413


[32m[I 2021-08-21 05:19:35,398][0m Trial 15 finished with value: 7.763931906966958 and parameters: {'learning_rate': 0.0015000012936103556, 'subsample': 0.6925856722791045, 'colsample_bytree': 0.4484370866583729, 'subsample_freq': 4, 'reg_lambda': 38.69485609679588, 'reg_alpha': 30.825613800065433, 'min_child_weight': 458.925384219028, 'min_child_samples': 112}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77559
[2000]	valid_0's rmse: 7.76323
[3000]	valid_0's rmse: 7.75949
Early stopping, best iteration is:
[3616]	valid_0's rmse: 7.75819
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77655
[2000]	valid_0's rmse: 7.76363
[3000]	valid_0's rmse: 7.76019
[4000]	valid_0's rmse: 7.75885
Early stopping, best iteration is:
[4379]	valid_0's rmse: 7.75808


[32m[I 2021-08-21 05:24:08,265][0m Trial 16 finished with value: 7.756755913073193 and parameters: {'learning_rate': 0.00853641987255329, 'subsample': 0.6500794349773636, 'colsample_bytree': 0.531517501477175, 'subsample_freq': 6, 'reg_lambda': 36.19505613518561, 'reg_alpha': 26.92965224252249, 'min_child_weight': 433.24615721757516, 'min_child_samples': 103}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.7647
[2000]	valid_0's rmse: 7.76132
Early stopping, best iteration is:
[1987]	valid_0's rmse: 7.76109
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76471
[2000]	valid_0's rmse: 7.7599
Early stopping, best iteration is:
[1835]	valid_0's rmse: 7.75936


[32m[I 2021-08-21 05:26:20,701][0m Trial 17 finished with value: 7.757915382594838 and parameters: {'learning_rate': 0.01836444995576427, 'subsample': 0.7568069720038744, 'colsample_bytree': 0.4786945863915079, 'subsample_freq': 5, 'reg_lambda': 42.91827745388838, 'reg_alpha': 20.425004103989103, 'min_child_weight': 404.9933161742756, 'min_child_samples': 118}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76098
Early stopping, best iteration is:
[1327]	valid_0's rmse: 7.75957
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76273
Early stopping, best iteration is:
[1057]	valid_0's rmse: 7.76204


[32m[I 2021-08-21 05:28:01,643][0m Trial 18 finished with value: 7.7573049306772575 and parameters: {'learning_rate': 0.026499738121663945, 'subsample': 0.6902634052119405, 'colsample_bytree': 0.5800437268058233, 'subsample_freq': 4, 'reg_lambda': 35.64935891999333, 'reg_alpha': 29.962305908090915, 'min_child_weight': 472.9673205781396, 'min_child_samples': 111}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.7655
Early stopping, best iteration is:
[871]	valid_0's rmse: 7.76393
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.7637
Early stopping, best iteration is:
[864]	valid_0's rmse: 7.76317


[32m[I 2021-08-21 05:29:17,713][0m Trial 19 finished with value: 7.75970456712699 and parameters: {'learning_rate': 0.034408811259850486, 'subsample': 0.7572891445931575, 'colsample_bytree': 0.5641948660200881, 'subsample_freq': 6, 'reg_lambda': 45.80696990483456, 'reg_alpha': 26.6025463206831, 'min_child_weight': 441.41441150193907, 'min_child_samples': 94}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.78126
[2000]	valid_0's rmse: 7.76575
[3000]	valid_0's rmse: 7.76107
[4000]	valid_0's rmse: 7.75941
Early stopping, best iteration is:
[4187]	valid_0's rmse: 7.75901
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.78118
[2000]	valid_0's rmse: 7.76607
[3000]	valid_0's rmse: 7.76148
[4000]	valid_0's rmse: 7.75934
Early stopping, best iteration is:
[4455]	valid_0's rmse: 7.75805


[32m[I 2021-08-21 05:33:33,512][0m Trial 20 finished with value: 7.7574331134174725 and parameters: {'learning_rate': 0.007211168797456119, 'subsample': 0.5612786502904605, 'colsample_bytree': 0.43837908905374523, 'subsample_freq': 5, 'reg_lambda': 38.5803307603097, 'reg_alpha': 28.243443029344252, 'min_child_weight': 460.95594713331764, 'min_child_samples': 101}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.78526
[2000]	valid_0's rmse: 7.76773
[3000]	valid_0's rmse: 7.7618
[4000]	valid_0's rmse: 7.75944
[5000]	valid_0's rmse: 7.75844
Early stopping, best iteration is:
[5534]	valid_0's rmse: 7.75775
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.78573
[2000]	valid_0's rmse: 7.76843
[3000]	valid_0's rmse: 7.76233
[4000]	valid_0's rmse: 7.75966
[5000]	valid_0's rmse: 7.75795
[6000]	valid_0's rmse: 7.75638
[7000]	valid_0's rmse: 7.75574
Early stopping, best iteration is:
[6829]	valid_0's rmse: 7.75554


[32m[I 2021-08-21 05:39:43,723][0m Trial 21 finished with value: 7.75575855101279 and parameters: {'learning_rate': 0.0061732474003399936, 'subsample': 0.7404285165783486, 'colsample_bytree': 0.4220405509378587, 'subsample_freq': 6, 'reg_lambda': 41.13830958848551, 'reg_alpha': 26.049755987463147, 'min_child_weight': 456.8798959946385, 'min_child_samples': 105}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76685
[2000]	valid_0's rmse: 7.75971
[3000]	valid_0's rmse: 7.75912
Early stopping, best iteration is:
[2815]	valid_0's rmse: 7.75878
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76647
[2000]	valid_0's rmse: 7.76064
Early stopping, best iteration is:
[1944]	valid_0's rmse: 7.76034


[32m[I 2021-08-21 05:42:24,761][0m Trial 22 finished with value: 7.757473524845453 and parameters: {'learning_rate': 0.01464206501006796, 'subsample': 0.7359701536200625, 'colsample_bytree': 0.4689746570615483, 'subsample_freq': 6, 'reg_lambda': 40.5856107909693, 'reg_alpha': 25.93858763261388, 'min_child_weight': 454.0976450125016, 'min_child_samples': 105}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.7848
[2000]	valid_0's rmse: 7.76728
[3000]	valid_0's rmse: 7.76127
[4000]	valid_0's rmse: 7.75865
[5000]	valid_0's rmse: 7.75689
Early stopping, best iteration is:
[5385]	valid_0's rmse: 7.7566
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.78503
[2000]	valid_0's rmse: 7.76787
[3000]	valid_0's rmse: 7.76194
[4000]	valid_0's rmse: 7.75944
Early stopping, best iteration is:
[4392]	valid_0's rmse: 7.75822


[32m[I 2021-08-21 05:47:27,108][0m Trial 23 finished with value: 7.756633214029168 and parameters: {'learning_rate': 0.006170958053378423, 'subsample': 0.7131765677663389, 'colsample_bytree': 0.4352476429261882, 'subsample_freq': 6, 'reg_lambda': 42.3245441759586, 'reg_alpha': 25.996928126896407, 'min_child_weight': 475.0327637011139, 'min_child_samples': 105}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.82562
[2000]	valid_0's rmse: 7.8075
[3000]	valid_0's rmse: 7.79525
[4000]	valid_0's rmse: 7.78654
[5000]	valid_0's rmse: 7.78039
[6000]	valid_0's rmse: 7.77549
[7000]	valid_0's rmse: 7.77174
[8000]	valid_0's rmse: 7.76896
[9000]	valid_0's rmse: 7.76665
[10000]	valid_0's rmse: 7.76487
Did not meet early stopping. Best iteration is:
[10000]	valid_0's rmse: 7.76487
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.82583
[2000]	valid_0's rmse: 7.80738
[3000]	valid_0's rmse: 7.79522
[4000]	valid_0's rmse: 7.78652
[5000]	valid_0's rmse: 7.78019
[6000]	valid_0's rmse: 7.77535
[7000]	valid_0's rmse: 7.77164
[8000]	valid_0's rmse: 7.76869
[9000]	valid_0's rmse: 7.7664
[10000]	valid_0's rmse: 7.76476
Did not meet early stopping. Best iteration is:
[9999]	valid_0's rmse: 7.76476


[32m[I 2021-08-21 05:59:27,785][0m Trial 24 finished with value: 7.764713409478993 and parameters: {'learning_rate': 0.001423449177061188, 'subsample': 0.696322387061556, 'colsample_bytree': 0.49092318264359674, 'subsample_freq': 6, 'reg_lambda': 40.03250681820499, 'reg_alpha': 27.845288301020883, 'min_child_weight': 421.42043983941005, 'min_child_samples': 110}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77022
[2000]	valid_0's rmse: 7.76229
[3000]	valid_0's rmse: 7.75925
Early stopping, best iteration is:
[3114]	valid_0's rmse: 7.75882
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76891
[2000]	valid_0's rmse: 7.76092
Early stopping, best iteration is:
[2767]	valid_0's rmse: 7.75923


[32m[I 2021-08-21 06:02:54,423][0m Trial 25 finished with value: 7.757234477367284 and parameters: {'learning_rate': 0.011837909609116943, 'subsample': 0.670545711763256, 'colsample_bytree': 0.5252537649787961, 'subsample_freq': 6, 'reg_lambda': 43.93525712923854, 'reg_alpha': 29.35098542133813, 'min_child_weight': 466.72787555705963, 'min_child_samples': 100}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.7631
Early stopping, best iteration is:
[1680]	valid_0's rmse: 7.76018
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76208
Early stopping, best iteration is:
[1599]	valid_0's rmse: 7.75978


[32m[I 2021-08-21 06:04:36,682][0m Trial 26 finished with value: 7.757496157673964 and parameters: {'learning_rate': 0.020996575017074286, 'subsample': 0.7469552582735424, 'colsample_bytree': 0.38920131732177454, 'subsample_freq': 6, 'reg_lambda': 41.004687635493724, 'reg_alpha': 26.471048999860184, 'min_child_weight': 442.20222118196426, 'min_child_samples': 95}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.79014
[2000]	valid_0's rmse: 7.7717
[3000]	valid_0's rmse: 7.76465
[4000]	valid_0's rmse: 7.76122
[5000]	valid_0's rmse: 7.75926
[6000]	valid_0's rmse: 7.75827
Early stopping, best iteration is:
[6361]	valid_0's rmse: 7.75785
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.79069
[2000]	valid_0's rmse: 7.77166
[3000]	valid_0's rmse: 7.764
[4000]	valid_0's rmse: 7.76013
[5000]	valid_0's rmse: 7.75828
[6000]	valid_0's rmse: 7.75705
Early stopping, best iteration is:
[6110]	valid_0's rmse: 7.75686


[32m[I 2021-08-21 06:11:06,026][0m Trial 27 finished with value: 7.756738877249419 and parameters: {'learning_rate': 0.005088182635732667, 'subsample': 0.723463704425896, 'colsample_bytree': 0.46754530628595875, 'subsample_freq': 5, 'reg_lambda': 38.05761701352232, 'reg_alpha': 28.801005638230183, 'min_child_weight': 456.3123120285211, 'min_child_samples': 105}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77144
[2000]	valid_0's rmse: 7.76187
[3000]	valid_0's rmse: 7.76004
Early stopping, best iteration is:
[3146]	valid_0's rmse: 7.75968
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.77147
[2000]	valid_0's rmse: 7.76292
Early stopping, best iteration is:
[2647]	valid_0's rmse: 7.76099


[32m[I 2021-08-21 06:14:00,541][0m Trial 28 finished with value: 7.7587944834151035 and parameters: {'learning_rate': 0.011148068184459192, 'subsample': 0.6465143129883181, 'colsample_bytree': 0.4313461882442464, 'subsample_freq': 6, 'reg_lambda': 42.442262146094826, 'reg_alpha': 30.04616278862581, 'min_child_weight': 448.90310479708484, 'min_child_samples': 114}. Best is trial 0 with value: 7.75486952004987.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76442
[2000]	valid_0's rmse: 7.75796
Early stopping, best iteration is:
[2223]	valid_0's rmse: 7.7575
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 7.76693
[2000]	valid_0's rmse: 7.76056
Early stopping, best iteration is:
[2624]	valid_0's rmse: 7.75913


[32m[I 2021-08-21 06:16:21,484][0m Trial 29 finished with value: 7.756282319048207 and parameters: {'learning_rate': 0.015162879758868255, 'subsample': 0.7014794041975789, 'colsample_bytree': 0.38999127317257914, 'subsample_freq': 4, 'reg_lambda': 39.37255344999815, 'reg_alpha': 23.591726418742564, 'min_child_weight': 434.5570113577248, 'min_child_samples': 103}. Best is trial 0 with value: 7.75486952004987.[0m


Number of finished trials: 30
Best trial: {'learning_rate': 0.01376508999204714, 'subsample': 0.6948557461507425, 'colsample_bytree': 0.5127040908887421, 'subsample_freq': 4, 'reg_lambda': 42.010560753005905, 'reg_alpha': 27.268524196259943, 'min_child_weight': 455.1441524357472, 'min_child_samples': 103}


In [10]:
study.best_params

{'learning_rate': 0.01376508999204714,
 'subsample': 0.6948557461507425,
 'colsample_bytree': 0.5127040908887421,
 'subsample_freq': 4,
 'reg_lambda': 42.010560753005905,
 'reg_alpha': 27.268524196259943,
 'min_child_weight': 455.1441524357472,
 'min_child_samples': 103}

# Log

7.770644795373356 row-wise noise ver1

7.756419562622152 no noise ver5

7.755642795995508 no noise ver7 (narrow space)

7.770199059325497 random noise ver6


kfold random noise ver3
7.846914982754461