In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,C,B,B,A,E,C,N,...,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,A,A,B,D,A,F,A,O,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,A,C,B,D,A,D,A,F,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,A,C,B,D,A,E,C,K,...,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,A,C,B,D,A,E,A,N,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [6]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()


Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,4.0,2.0,13.0,...,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,1.0,1.0,0.0,0.0,1.0,3.0,0.0,5.0,0.0,14.0,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,0.0,0.0,0.0,2.0,1.0,3.0,0.0,3.0,0.0,5.0,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,1.0,1.0,0.0,2.0,1.0,3.0,0.0,4.0,2.0,10.0,...,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,0.0,0.0,0.0,2.0,1.0,3.0,0.0,4.0,0.0,13.0,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


In [7]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()
'''

'\n# List of categorical columns\nobject_cols = [col for col in features.columns if \'cat\' in col]\n\n# one-hot-encode categorical columns\nX = features.copy()\nX_test = test.drop([\'id\'], axis=1).copy()\n\noh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")\nX_ohe = oh_encoder.fit_transform(features[object_cols])\nX_test_ohe = oh_encoder.transform(test[object_cols])\n\nX_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])\nX_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])\n\nX = pd.concat([X, X_ohe], axis=1)\nX_test = pd.concat([X_test, X_test_ohe], axis=1)\nX = X.drop(object_cols, axis=1)\nX_test = X_test.drop(object_cols, axis=1)\n    \n# Preview the one-hot-encoded features\nX.head()\n'

In [8]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [9]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Optuna

In [10]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [11]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=X, y=y):
  """
  """
  param_space = {
               #'device':'gpu',  # Use GPU acceleration
               #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 6.6, 7.8),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 34, 38),
                'subsample': trial.suggest_uniform('subsample', 0.825, 0.875),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.645, 0.695),
                #'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
               'learning_rate':trial.suggest_uniform('learning_rate', 5.5e-2, 7.5e-2),
                 'min_child_samples':trial.suggest_int('min_child_samples', 23, 27),
              'num_leaves':trial.suggest_int('num_leaves', 19, 29),
              'max_depth':trial.suggest_int('max_depth', 2, 3),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
               'n_estimators':N_ESTIMATORS,
                'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 732, 762),
              'cat_smooth':trial.suggest_int('cat_smooth', 88, 92),
              'cat_l2':trial.suggest_loguniform('cat_l2', 3.7, 6.25)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  #lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  pred=np.zeros(y_va.shape[0])
    
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
    
  #X_tr = apply_noise_row(X_tr)

  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_tr, 
        y_tr,
        eval_set=[(X_va, y_va)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
        categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
    )


    pred += model.predict(X_va) / len(seed_list)
    
  #elapsed = time.time() - start
  rmse = mean_squared_error(y_va, pred, squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [12]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-21 02:36:49,215][0m A new study created in memory with name: no-name-128cd841-23a8-41c6-bab0-2a69f026ebdc[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717089
[2000]	valid_0's rmse: 0.715067
[3000]	valid_0's rmse: 0.714583
Early stopping, best iteration is:
[3090]	valid_0's rmse: 0.714566
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717119
[2000]	valid_0's rmse: 0.715086
[3000]	valid_0's rmse: 0.714727
Early stopping, best iteration is:
[3388]	valid_0's rmse: 0.714675


[32m[I 2021-08-21 02:38:08,318][0m Trial 0 finished with value: 0.7144695779946586 and parameters: {'reg_lambda': 7.685780275889322, 'reg_alpha': 35.06254186395001, 'subsample': 0.8673627928188785, 'colsample_bytree': 0.6699163816690578, 'learning_rate': 0.0683907765278896, 'min_child_samples': 24, 'num_leaves': 21, 'max_depth': 3, 'max_bin': 738, 'cat_smooth': 90, 'cat_l2': 5.569087556194927}. Best is trial 0 with value: 0.7144695779946586.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720394
[2000]	valid_0's rmse: 0.716788
[3000]	valid_0's rmse: 0.715269
[4000]	valid_0's rmse: 0.714592
[5000]	valid_0's rmse: 0.714362
[6000]	valid_0's rmse: 0.714273
Early stopping, best iteration is:
[5816]	valid_0's rmse: 0.714261
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720472
[2000]	valid_0's rmse: 0.716873
[3000]	valid_0's rmse: 0.715281
[4000]	valid_0's rmse: 0.714641
[5000]	valid_0's rmse: 0.714371
[6000]	valid_0's rmse: 0.714259
Early stopping, best iteration is:
[5988]	valid_0's rmse: 0.714258


[32m[I 2021-08-21 02:40:00,782][0m Trial 1 finished with value: 0.7141631430900747 and parameters: {'reg_lambda': 6.954468373460137, 'reg_alpha': 35.06911897318049, 'subsample': 0.8592515353056105, 'colsample_bytree': 0.6566066931274556, 'learning_rate': 0.07434827317454108, 'min_child_samples': 25, 'num_leaves': 24, 'max_depth': 2, 'max_bin': 760, 'cat_smooth': 92, 'cat_l2': 5.008394512233755}. Best is trial 1 with value: 0.7141631430900747.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.718036
[2000]	valid_0's rmse: 0.715522
[3000]	valid_0's rmse: 0.714877
[4000]	valid_0's rmse: 0.714608
Early stopping, best iteration is:
[4304]	valid_0's rmse: 0.714564
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717999
[2000]	valid_0's rmse: 0.715523
[3000]	valid_0's rmse: 0.714908
[4000]	valid_0's rmse: 0.714721
Early stopping, best iteration is:
[3989]	valid_0's rmse: 0.714716


[32m[I 2021-08-21 02:41:41,558][0m Trial 2 finished with value: 0.7144914352936411 and parameters: {'reg_lambda': 7.2092727576153175, 'reg_alpha': 36.175894245301706, 'subsample': 0.8626410525807126, 'colsample_bytree': 0.656925661769379, 'learning_rate': 0.05739073798483632, 'min_child_samples': 25, 'num_leaves': 27, 'max_depth': 3, 'max_bin': 745, 'cat_smooth': 92, 'cat_l2': 4.1559055506504174}. Best is trial 1 with value: 0.7141631430900747.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717129
[2000]	valid_0's rmse: 0.715022
[3000]	valid_0's rmse: 0.714602
Early stopping, best iteration is:
[3414]	valid_0's rmse: 0.714506
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71714
[2000]	valid_0's rmse: 0.715183
Early stopping, best iteration is:
[2788]	valid_0's rmse: 0.714729


[32m[I 2021-08-21 02:42:58,795][0m Trial 3 finished with value: 0.7144495728916018 and parameters: {'reg_lambda': 7.016352238155574, 'reg_alpha': 35.63857591019697, 'subsample': 0.8659875350260541, 'colsample_bytree': 0.6554635010712909, 'learning_rate': 0.06718133773879573, 'min_child_samples': 24, 'num_leaves': 24, 'max_depth': 3, 'max_bin': 760, 'cat_smooth': 88, 'cat_l2': 5.624216783926828}. Best is trial 1 with value: 0.7141631430900747.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720969
[2000]	valid_0's rmse: 0.717299
[3000]	valid_0's rmse: 0.715581
[4000]	valid_0's rmse: 0.714764
[5000]	valid_0's rmse: 0.714423
[6000]	valid_0's rmse: 0.714262
[7000]	valid_0's rmse: 0.714193
Early stopping, best iteration is:
[7257]	valid_0's rmse: 0.714186
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721063
[2000]	valid_0's rmse: 0.717235
[3000]	valid_0's rmse: 0.71558
[4000]	valid_0's rmse: 0.714771
[5000]	valid_0's rmse: 0.714426
[6000]	valid_0's rmse: 0.714307
[7000]	valid_0's rmse: 0.714252
Early stopping, best iteration is:
[7633]	valid_0's rmse: 0.714229


[32m[I 2021-08-21 02:45:21,217][0m Trial 4 finished with value: 0.7141080551505147 and parameters: {'reg_lambda': 6.674644141507648, 'reg_alpha': 34.564481203078415, 'subsample': 0.8471442204424425, 'colsample_bytree': 0.6836092841645856, 'learning_rate': 0.06665943252736647, 'min_child_samples': 26, 'num_leaves': 19, 'max_depth': 2, 'max_bin': 755, 'cat_smooth': 89, 'cat_l2': 5.2034916672503755}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720429
[2000]	valid_0's rmse: 0.716858
[3000]	valid_0's rmse: 0.715413
[4000]	valid_0's rmse: 0.714777
[5000]	valid_0's rmse: 0.714472
[6000]	valid_0's rmse: 0.71435
Early stopping, best iteration is:
[6224]	valid_0's rmse: 0.714324
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720485
[2000]	valid_0's rmse: 0.716875
[3000]	valid_0's rmse: 0.715286
[4000]	valid_0's rmse: 0.714715
[5000]	valid_0's rmse: 0.714467
[6000]	valid_0's rmse: 0.714372
Early stopping, best iteration is:
[6345]	valid_0's rmse: 0.714339


[32m[I 2021-08-21 02:47:26,103][0m Trial 5 finished with value: 0.7142274855942211 and parameters: {'reg_lambda': 7.066459772561306, 'reg_alpha': 35.41142675987052, 'subsample': 0.8557021764974305, 'colsample_bytree': 0.6768217473880961, 'learning_rate': 0.0736511184713422, 'min_child_samples': 27, 'num_leaves': 28, 'max_depth': 2, 'max_bin': 736, 'cat_smooth': 88, 'cat_l2': 4.846236975004745}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71742
[2000]	valid_0's rmse: 0.715176
[3000]	valid_0's rmse: 0.714738
Early stopping, best iteration is:
[3419]	valid_0's rmse: 0.714707
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717526
[2000]	valid_0's rmse: 0.715287
[3000]	valid_0's rmse: 0.714779
Early stopping, best iteration is:
[3578]	valid_0's rmse: 0.71466


[32m[I 2021-08-21 02:48:51,051][0m Trial 6 finished with value: 0.7145346985429333 and parameters: {'reg_lambda': 6.938634599001583, 'reg_alpha': 34.68712567076545, 'subsample': 0.8600383932625957, 'colsample_bytree': 0.6871757191717716, 'learning_rate': 0.06351513491629736, 'min_child_samples': 25, 'num_leaves': 29, 'max_depth': 3, 'max_bin': 752, 'cat_smooth': 92, 'cat_l2': 3.9324304743048253}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.72172
[2000]	valid_0's rmse: 0.717918
[3000]	valid_0's rmse: 0.716087
[4000]	valid_0's rmse: 0.715162
[5000]	valid_0's rmse: 0.714709
[6000]	valid_0's rmse: 0.714481
[7000]	valid_0's rmse: 0.714373
[8000]	valid_0's rmse: 0.714308
Early stopping, best iteration is:
[7893]	valid_0's rmse: 0.714305
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721682
[2000]	valid_0's rmse: 0.717909
[3000]	valid_0's rmse: 0.716034
[4000]	valid_0's rmse: 0.715079
[5000]	valid_0's rmse: 0.714605
[6000]	valid_0's rmse: 0.714354
[7000]	valid_0's rmse: 0.714242
[8000]	valid_0's rmse: 0.714199
Early stopping, best iteration is:
[8005]	valid_0's rmse: 0.714199


[32m[I 2021-08-21 02:51:24,014][0m Trial 7 finished with value: 0.7141575312137134 and parameters: {'reg_lambda': 7.542968754257447, 'reg_alpha': 37.22387519111675, 'subsample': 0.854395788059009, 'colsample_bytree': 0.688697014845815, 'learning_rate': 0.05974530962117951, 'min_child_samples': 27, 'num_leaves': 22, 'max_depth': 2, 'max_bin': 744, 'cat_smooth': 89, 'cat_l2': 4.786312572308717}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721212
[2000]	valid_0's rmse: 0.717641
[3000]	valid_0's rmse: 0.715896
[4000]	valid_0's rmse: 0.715062
[5000]	valid_0's rmse: 0.71456
[6000]	valid_0's rmse: 0.714355
[7000]	valid_0's rmse: 0.714258
Early stopping, best iteration is:
[7091]	valid_0's rmse: 0.714252
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721425
[2000]	valid_0's rmse: 0.717503
[3000]	valid_0's rmse: 0.715891
[4000]	valid_0's rmse: 0.715013
[5000]	valid_0's rmse: 0.714581
[6000]	valid_0's rmse: 0.714377
[7000]	valid_0's rmse: 0.714257
Early stopping, best iteration is:
[7201]	valid_0's rmse: 0.714236


[32m[I 2021-08-21 02:53:39,208][0m Trial 8 finished with value: 0.7141544982873875 and parameters: {'reg_lambda': 7.086605212944113, 'reg_alpha': 37.96743865891143, 'subsample': 0.8307869762182328, 'colsample_bytree': 0.6486420109473578, 'learning_rate': 0.06303343103190809, 'min_child_samples': 26, 'num_leaves': 22, 'max_depth': 2, 'max_bin': 739, 'cat_smooth': 90, 'cat_l2': 4.8542973658215995}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717523
[2000]	valid_0's rmse: 0.71521
[3000]	valid_0's rmse: 0.714662
Early stopping, best iteration is:
[3592]	valid_0's rmse: 0.714518
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717552
[2000]	valid_0's rmse: 0.715248
[3000]	valid_0's rmse: 0.714781
Early stopping, best iteration is:
[3470]	valid_0's rmse: 0.714683


[32m[I 2021-08-21 02:55:06,455][0m Trial 9 finished with value: 0.7144547870122115 and parameters: {'reg_lambda': 7.270277159511425, 'reg_alpha': 34.212137620723695, 'subsample': 0.8316557218534195, 'colsample_bytree': 0.6892847559803866, 'learning_rate': 0.06383938051491508, 'min_child_samples': 27, 'num_leaves': 25, 'max_depth': 3, 'max_bin': 748, 'cat_smooth': 90, 'cat_l2': 4.768879627912868}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720777
[2000]	valid_0's rmse: 0.717126
[3000]	valid_0's rmse: 0.715518
[4000]	valid_0's rmse: 0.714754
[5000]	valid_0's rmse: 0.714405
[6000]	valid_0's rmse: 0.71427
[7000]	valid_0's rmse: 0.714225
Early stopping, best iteration is:
[7163]	valid_0's rmse: 0.71422
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720948
[2000]	valid_0's rmse: 0.716989
[3000]	valid_0's rmse: 0.715485
[4000]	valid_0's rmse: 0.714715
[5000]	valid_0's rmse: 0.71438
Early stopping, best iteration is:
[5373]	valid_0's rmse: 0.714297


[32m[I 2021-08-21 02:57:06,592][0m Trial 10 finished with value: 0.7141475091092794 and parameters: {'reg_lambda': 6.630018031951741, 'reg_alpha': 36.46194236681363, 'subsample': 0.8427304621099018, 'colsample_bytree': 0.6788684177653164, 'learning_rate': 0.06969854911341969, 'min_child_samples': 23, 'num_leaves': 19, 'max_depth': 2, 'max_bin': 755, 'cat_smooth': 89, 'cat_l2': 4.310451555240133}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720814
[2000]	valid_0's rmse: 0.717116
[3000]	valid_0's rmse: 0.715496
[4000]	valid_0's rmse: 0.714777
[5000]	valid_0's rmse: 0.714511
[6000]	valid_0's rmse: 0.714398
Early stopping, best iteration is:
[6058]	valid_0's rmse: 0.714389
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720695
[2000]	valid_0's rmse: 0.71713
[3000]	valid_0's rmse: 0.715472
[4000]	valid_0's rmse: 0.714825
[5000]	valid_0's rmse: 0.714544
[6000]	valid_0's rmse: 0.714435
Early stopping, best iteration is:
[6589]	valid_0's rmse: 0.714395


[32m[I 2021-08-21 02:59:05,801][0m Trial 11 finished with value: 0.7143050513992377 and parameters: {'reg_lambda': 6.640563421251799, 'reg_alpha': 36.51070567608584, 'subsample': 0.8422929704565661, 'colsample_bytree': 0.6770109520623483, 'learning_rate': 0.07077692837620767, 'min_child_samples': 23, 'num_leaves': 19, 'max_depth': 2, 'max_bin': 756, 'cat_smooth': 89, 'cat_l2': 4.224733761288754}. Best is trial 4 with value: 0.7141080551505147.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720627
[2000]	valid_0's rmse: 0.716935
[3000]	valid_0's rmse: 0.715371
[4000]	valid_0's rmse: 0.714629
[5000]	valid_0's rmse: 0.714343
[6000]	valid_0's rmse: 0.714211
[7000]	valid_0's rmse: 0.714142
Early stopping, best iteration is:
[7447]	valid_0's rmse: 0.714111
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720769
[2000]	valid_0's rmse: 0.717016
[3000]	valid_0's rmse: 0.715453
[4000]	valid_0's rmse: 0.714691
[5000]	valid_0's rmse: 0.714369
[6000]	valid_0's rmse: 0.714256
Early stopping, best iteration is:
[5991]	valid_0's rmse: 0.714256


[32m[I 2021-08-21 03:01:13,523][0m Trial 12 finished with value: 0.714081271380416 and parameters: {'reg_lambda': 6.616765040165289, 'reg_alpha': 36.805761204217944, 'subsample': 0.8431140870589577, 'colsample_bytree': 0.6763512706014263, 'learning_rate': 0.07080612651038082, 'min_child_samples': 26, 'num_leaves': 19, 'max_depth': 2, 'max_bin': 754, 'cat_smooth': 89, 'cat_l2': 6.026168441469418}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720555
[2000]	valid_0's rmse: 0.716954
[3000]	valid_0's rmse: 0.715404
[4000]	valid_0's rmse: 0.714698
[5000]	valid_0's rmse: 0.714422
[6000]	valid_0's rmse: 0.71428
Early stopping, best iteration is:
[6441]	valid_0's rmse: 0.714243
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720556
[2000]	valid_0's rmse: 0.71693
[3000]	valid_0's rmse: 0.715317
[4000]	valid_0's rmse: 0.714628
[5000]	valid_0's rmse: 0.714315
[6000]	valid_0's rmse: 0.714188
[7000]	valid_0's rmse: 0.714141
Early stopping, best iteration is:
[7009]	valid_0's rmse: 0.714139


[32m[I 2021-08-21 03:03:22,811][0m Trial 13 finished with value: 0.7140914808283688 and parameters: {'reg_lambda': 6.7300372466714355, 'reg_alpha': 37.48862902588565, 'subsample': 0.8442663208667539, 'colsample_bytree': 0.6696899930461401, 'learning_rate': 0.07151347440667236, 'min_child_samples': 26, 'num_leaves': 20, 'max_depth': 2, 'max_bin': 751, 'cat_smooth': 89, 'cat_l2': 6.183199481691027}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720635
[2000]	valid_0's rmse: 0.716952
[3000]	valid_0's rmse: 0.715391
[4000]	valid_0's rmse: 0.714702
[5000]	valid_0's rmse: 0.714381
[6000]	valid_0's rmse: 0.714267
Early stopping, best iteration is:
[6698]	valid_0's rmse: 0.714228
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720545
[2000]	valid_0's rmse: 0.716925
[3000]	valid_0's rmse: 0.715373
[4000]	valid_0's rmse: 0.714667
[5000]	valid_0's rmse: 0.714364
[6000]	valid_0's rmse: 0.714248
Early stopping, best iteration is:
[6172]	valid_0's rmse: 0.714232


[32m[I 2021-08-21 03:05:25,194][0m Trial 14 finished with value: 0.7141353080556251 and parameters: {'reg_lambda': 6.7802499116272825, 'reg_alpha': 37.556617334395526, 'subsample': 0.8359831325501461, 'colsample_bytree': 0.6658555891014544, 'learning_rate': 0.07199618608680182, 'min_child_samples': 26, 'num_leaves': 21, 'max_depth': 2, 'max_bin': 750, 'cat_smooth': 91, 'cat_l2': 6.182892292321872}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.72032
[2000]	valid_0's rmse: 0.716703
[3000]	valid_0's rmse: 0.71514
[4000]	valid_0's rmse: 0.714564
[5000]	valid_0's rmse: 0.714286
[6000]	valid_0's rmse: 0.71418
[7000]	valid_0's rmse: 0.714149
Early stopping, best iteration is:
[6843]	valid_0's rmse: 0.714134
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720319
[2000]	valid_0's rmse: 0.716714
[3000]	valid_0's rmse: 0.715249
[4000]	valid_0's rmse: 0.714664
[5000]	valid_0's rmse: 0.714391
Early stopping, best iteration is:
[5373]	valid_0's rmse: 0.71434


[32m[I 2021-08-21 03:07:22,374][0m Trial 15 finished with value: 0.7141291483959906 and parameters: {'reg_lambda': 6.805727619913024, 'reg_alpha': 37.072809493682584, 'subsample': 0.838660997968962, 'colsample_bytree': 0.6640155795667029, 'learning_rate': 0.07477682427516079, 'min_child_samples': 26, 'num_leaves': 20, 'max_depth': 2, 'max_bin': 753, 'cat_smooth': 88, 'cat_l2': 6.219092078081347}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.72069
[2000]	valid_0's rmse: 0.717027
[3000]	valid_0's rmse: 0.715323
[4000]	valid_0's rmse: 0.714687
[5000]	valid_0's rmse: 0.714383
[6000]	valid_0's rmse: 0.714268
Early stopping, best iteration is:
[6031]	valid_0's rmse: 0.714257
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720537
[2000]	valid_0's rmse: 0.717008
[3000]	valid_0's rmse: 0.715386
[4000]	valid_0's rmse: 0.714726
[5000]	valid_0's rmse: 0.714404
[6000]	valid_0's rmse: 0.714294
Early stopping, best iteration is:
[6645]	valid_0's rmse: 0.714248


[32m[I 2021-08-21 03:09:23,831][0m Trial 16 finished with value: 0.7141541838876997 and parameters: {'reg_lambda': 7.389528939926815, 'reg_alpha': 37.80108997461054, 'subsample': 0.8253091017804839, 'colsample_bytree': 0.6708425385135139, 'learning_rate': 0.07238548494351998, 'min_child_samples': 26, 'num_leaves': 22, 'max_depth': 2, 'max_bin': 758, 'cat_smooth': 89, 'cat_l2': 5.895048490163328}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720779
[2000]	valid_0's rmse: 0.717162
[3000]	valid_0's rmse: 0.715584
[4000]	valid_0's rmse: 0.714762
[5000]	valid_0's rmse: 0.714431
[6000]	valid_0's rmse: 0.714281
Early stopping, best iteration is:
[6564]	valid_0's rmse: 0.714219
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720619
[2000]	valid_0's rmse: 0.717216
[3000]	valid_0's rmse: 0.715554
[4000]	valid_0's rmse: 0.714783
[5000]	valid_0's rmse: 0.714452
[6000]	valid_0's rmse: 0.714302
Early stopping, best iteration is:
[6034]	valid_0's rmse: 0.714295


[32m[I 2021-08-21 03:11:24,222][0m Trial 17 finished with value: 0.7141621161943432 and parameters: {'reg_lambda': 6.787918086025174, 'reg_alpha': 37.00333424017857, 'subsample': 0.8507135218931857, 'colsample_bytree': 0.669875975201239, 'learning_rate': 0.06975094479107208, 'min_child_samples': 27, 'num_leaves': 20, 'max_depth': 2, 'max_bin': 743, 'cat_smooth': 91, 'cat_l2': 6.246167250987779}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720706
[2000]	valid_0's rmse: 0.71694
[3000]	valid_0's rmse: 0.715348
[4000]	valid_0's rmse: 0.714651
[5000]	valid_0's rmse: 0.714332
[6000]	valid_0's rmse: 0.714223
[7000]	valid_0's rmse: 0.714182
Early stopping, best iteration is:
[6929]	valid_0's rmse: 0.714175
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720691
[2000]	valid_0's rmse: 0.717056
[3000]	valid_0's rmse: 0.715312
[4000]	valid_0's rmse: 0.714626
[5000]	valid_0's rmse: 0.714392
[6000]	valid_0's rmse: 0.714266
[7000]	valid_0's rmse: 0.714214
Early stopping, best iteration is:
[6922]	valid_0's rmse: 0.714209


[32m[I 2021-08-21 03:13:36,999][0m Trial 18 finished with value: 0.714093654226078 and parameters: {'reg_lambda': 6.604527076502143, 'reg_alpha': 36.76422437762635, 'subsample': 0.8477672788844244, 'colsample_bytree': 0.6808995110508105, 'learning_rate': 0.071345545898672, 'min_child_samples': 24, 'num_leaves': 26, 'max_depth': 2, 'max_bin': 750, 'cat_smooth': 88, 'cat_l2': 5.765911278328634}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721057
[2000]	valid_0's rmse: 0.717375
[3000]	valid_0's rmse: 0.715789
[4000]	valid_0's rmse: 0.714996
[5000]	valid_0's rmse: 0.714631
[6000]	valid_0's rmse: 0.714449
[7000]	valid_0's rmse: 0.714362
Early stopping, best iteration is:
[7351]	valid_0's rmse: 0.714338
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721154
[2000]	valid_0's rmse: 0.717357
[3000]	valid_0's rmse: 0.715713
[4000]	valid_0's rmse: 0.71489
[5000]	valid_0's rmse: 0.714483
[6000]	valid_0's rmse: 0.714306
[7000]	valid_0's rmse: 0.714223
Early stopping, best iteration is:
[7284]	valid_0's rmse: 0.7142


[32m[I 2021-08-21 03:15:58,000][0m Trial 19 finished with value: 0.7141755595262351 and parameters: {'reg_lambda': 6.825116885582118, 'reg_alpha': 37.42848023697365, 'subsample': 0.8443848853742988, 'colsample_bytree': 0.6940063376302749, 'learning_rate': 0.06584650505909509, 'min_child_samples': 26, 'num_leaves': 20, 'max_depth': 2, 'max_bin': 732, 'cat_smooth': 91, 'cat_l2': 5.330781940229762}. Best is trial 12 with value: 0.714081271380416.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720874
[2000]	valid_0's rmse: 0.717102
[3000]	valid_0's rmse: 0.715409
[4000]	valid_0's rmse: 0.714651
[5000]	valid_0's rmse: 0.714334
[6000]	valid_0's rmse: 0.714154
[7000]	valid_0's rmse: 0.714069
Early stopping, best iteration is:
[7000]	valid_0's rmse: 0.714069
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720948
[2000]	valid_0's rmse: 0.71703
[3000]	valid_0's rmse: 0.715473
[4000]	valid_0's rmse: 0.714655
[5000]	valid_0's rmse: 0.714343
[6000]	valid_0's rmse: 0.714207
Early stopping, best iteration is:
[6722]	valid_0's rmse: 0.714136


[32m[I 2021-08-21 03:18:09,315][0m Trial 20 finished with value: 0.714007381480075 and parameters: {'reg_lambda': 6.704369605378165, 'reg_alpha': 35.9392368710277, 'subsample': 0.8721997285333448, 'colsample_bytree': 0.6622645153830327, 'learning_rate': 0.06841807942492734, 'min_child_samples': 25, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 747, 'cat_smooth': 89, 'cat_l2': 5.994307898874366}. Best is trial 20 with value: 0.714007381480075.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720683
[2000]	valid_0's rmse: 0.717076
[3000]	valid_0's rmse: 0.71541
[4000]	valid_0's rmse: 0.714601
[5000]	valid_0's rmse: 0.714272
Early stopping, best iteration is:
[5615]	valid_0's rmse: 0.71413
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720927
[2000]	valid_0's rmse: 0.717139
[3000]	valid_0's rmse: 0.715498
[4000]	valid_0's rmse: 0.714708
[5000]	valid_0's rmse: 0.714392
[6000]	valid_0's rmse: 0.714238
[7000]	valid_0's rmse: 0.714183
Early stopping, best iteration is:
[7374]	valid_0's rmse: 0.714158


[32m[I 2021-08-21 03:20:11,515][0m Trial 21 finished with value: 0.7140320332870725 and parameters: {'reg_lambda': 6.698534744033083, 'reg_alpha': 35.892721362537266, 'subsample': 0.8723270651189626, 'colsample_bytree': 0.6623238635757952, 'learning_rate': 0.06887645221916344, 'min_child_samples': 25, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 747, 'cat_smooth': 89, 'cat_l2': 5.973588362260898}. Best is trial 20 with value: 0.714007381480075.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720664
[2000]	valid_0's rmse: 0.716986
[3000]	valid_0's rmse: 0.715384
[4000]	valid_0's rmse: 0.714568
[5000]	valid_0's rmse: 0.714272
[6000]	valid_0's rmse: 0.714099
[7000]	valid_0's rmse: 0.714012
Early stopping, best iteration is:
[7135]	valid_0's rmse: 0.714
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.72092
[2000]	valid_0's rmse: 0.71727
[3000]	valid_0's rmse: 0.715554
[4000]	valid_0's rmse: 0.714792
[5000]	valid_0's rmse: 0.714404
[6000]	valid_0's rmse: 0.714249
[7000]	valid_0's rmse: 0.714176
Early stopping, best iteration is:
[7626]	valid_0's rmse: 0.714157


[32m[I 2021-08-21 03:22:31,917][0m Trial 22 finished with value: 0.7139793127526392 and parameters: {'reg_lambda': 6.609931804562087, 'reg_alpha': 35.89337620630626, 'subsample': 0.8737357601914469, 'colsample_bytree': 0.6612958156067924, 'learning_rate': 0.06767790040126322, 'min_child_samples': 25, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 747, 'cat_smooth': 89, 'cat_l2': 5.885433554272715}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720615
[2000]	valid_0's rmse: 0.717167
[3000]	valid_0's rmse: 0.715459
[4000]	valid_0's rmse: 0.714687
[5000]	valid_0's rmse: 0.714325
[6000]	valid_0's rmse: 0.714175
Early stopping, best iteration is:
[6724]	valid_0's rmse: 0.714104
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720883
[2000]	valid_0's rmse: 0.717136
[3000]	valid_0's rmse: 0.715399
[4000]	valid_0's rmse: 0.714691
[5000]	valid_0's rmse: 0.714335
[6000]	valid_0's rmse: 0.714205
[7000]	valid_0's rmse: 0.714128
Early stopping, best iteration is:
[7081]	valid_0's rmse: 0.714121


[32m[I 2021-08-21 03:24:44,352][0m Trial 23 finished with value: 0.7140163364989786 and parameters: {'reg_lambda': 6.896872020718579, 'reg_alpha': 35.85583758870463, 'subsample': 0.8749496446261755, 'colsample_bytree': 0.6617550206677685, 'learning_rate': 0.06856388239345414, 'min_child_samples': 25, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 747, 'cat_smooth': 90, 'cat_l2': 5.471511422409701}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721561
[2000]	valid_0's rmse: 0.717745
[3000]	valid_0's rmse: 0.715889
[4000]	valid_0's rmse: 0.714984
[5000]	valid_0's rmse: 0.714554
[6000]	valid_0's rmse: 0.714316
Early stopping, best iteration is:
[6733]	valid_0's rmse: 0.714259
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721648
[2000]	valid_0's rmse: 0.717734
[3000]	valid_0's rmse: 0.716023
[4000]	valid_0's rmse: 0.715101
[5000]	valid_0's rmse: 0.714665
[6000]	valid_0's rmse: 0.71444
[7000]	valid_0's rmse: 0.714354
Early stopping, best iteration is:
[6941]	valid_0's rmse: 0.714351


[32m[I 2021-08-21 03:26:54,045][0m Trial 24 finished with value: 0.7142204945423886 and parameters: {'reg_lambda': 6.901209409414016, 'reg_alpha': 35.919283679617706, 'subsample': 0.8736377210411502, 'colsample_bytree': 0.6531961506622663, 'learning_rate': 0.061619939576480216, 'min_child_samples': 24, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 742, 'cat_smooth': 90, 'cat_l2': 5.417577278686151}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720832
[2000]	valid_0's rmse: 0.717098
[3000]	valid_0's rmse: 0.715461
[4000]	valid_0's rmse: 0.714718
[5000]	valid_0's rmse: 0.714343
[6000]	valid_0's rmse: 0.714164
[7000]	valid_0's rmse: 0.714096
Early stopping, best iteration is:
[7125]	valid_0's rmse: 0.714091
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720957
[2000]	valid_0's rmse: 0.717203
[3000]	valid_0's rmse: 0.715556
[4000]	valid_0's rmse: 0.714751
[5000]	valid_0's rmse: 0.714383
[6000]	valid_0's rmse: 0.714215
Early stopping, best iteration is:
[6467]	valid_0's rmse: 0.714161


[32m[I 2021-08-21 03:29:02,442][0m Trial 25 finished with value: 0.7140281673507306 and parameters: {'reg_lambda': 6.891534610919295, 'reg_alpha': 35.49685200531406, 'subsample': 0.8704395663592696, 'colsample_bytree': 0.6611291627004622, 'learning_rate': 0.0673694288491911, 'min_child_samples': 25, 'num_leaves': 25, 'max_depth': 2, 'max_bin': 747, 'cat_smooth': 90, 'cat_l2': 5.594417788435411}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721169
[2000]	valid_0's rmse: 0.717415
[3000]	valid_0's rmse: 0.7158
[4000]	valid_0's rmse: 0.714867
[5000]	valid_0's rmse: 0.714507
[6000]	valid_0's rmse: 0.71437
Early stopping, best iteration is:
[6720]	valid_0's rmse: 0.714287
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721184
[2000]	valid_0's rmse: 0.717469
[3000]	valid_0's rmse: 0.715762
[4000]	valid_0's rmse: 0.714916
[5000]	valid_0's rmse: 0.714525
[6000]	valid_0's rmse: 0.714353
Early stopping, best iteration is:
[6538]	valid_0's rmse: 0.71431


[32m[I 2021-08-21 03:31:09,562][0m Trial 26 finished with value: 0.7142058597564799 and parameters: {'reg_lambda': 7.124363517557133, 'reg_alpha': 36.18549891143406, 'subsample': 0.8693129821250198, 'colsample_bytree': 0.6493119733495171, 'learning_rate': 0.06474669328866985, 'min_child_samples': 24, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 741, 'cat_smooth': 91, 'cat_l2': 5.192200160530959}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.72092
[2000]	valid_0's rmse: 0.717201
[3000]	valid_0's rmse: 0.715607
[4000]	valid_0's rmse: 0.71488
[5000]	valid_0's rmse: 0.714496
[6000]	valid_0's rmse: 0.714381
[7000]	valid_0's rmse: 0.714293
Early stopping, best iteration is:
[7160]	valid_0's rmse: 0.714278
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720868
[2000]	valid_0's rmse: 0.717206
[3000]	valid_0's rmse: 0.715509
[4000]	valid_0's rmse: 0.714761
[5000]	valid_0's rmse: 0.714406
Early stopping, best iteration is:
[5603]	valid_0's rmse: 0.714304


[32m[I 2021-08-21 03:33:11,215][0m Trial 27 finished with value: 0.7141810899723732 and parameters: {'reg_lambda': 6.717621578854126, 'reg_alpha': 35.206113426541336, 'subsample': 0.8742902928948034, 'colsample_bytree': 0.6599479563321384, 'learning_rate': 0.06817212988918453, 'min_child_samples': 25, 'num_leaves': 25, 'max_depth': 2, 'max_bin': 745, 'cat_smooth': 90, 'cat_l2': 5.77544380983315}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.720993
[2000]	valid_0's rmse: 0.717265
[3000]	valid_0's rmse: 0.715578
[4000]	valid_0's rmse: 0.714721
[5000]	valid_0's rmse: 0.714309
[6000]	valid_0's rmse: 0.714101
Early stopping, best iteration is:
[6587]	valid_0's rmse: 0.714031
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721187
[2000]	valid_0's rmse: 0.717505
[3000]	valid_0's rmse: 0.715776
[4000]	valid_0's rmse: 0.714898
[5000]	valid_0's rmse: 0.714492
[6000]	valid_0's rmse: 0.714351
[7000]	valid_0's rmse: 0.714292
Early stopping, best iteration is:
[6811]	valid_0's rmse: 0.714289


[32m[I 2021-08-21 03:35:18,087][0m Trial 28 finished with value: 0.7140684064247501 and parameters: {'reg_lambda': 6.855126036474554, 'reg_alpha': 35.62485319025951, 'subsample': 0.8746495151598167, 'colsample_bytree': 0.6527820812378021, 'learning_rate': 0.0652054076536791, 'min_child_samples': 25, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 749, 'cat_smooth': 88, 'cat_l2': 5.438983341097058}. Best is trial 22 with value: 0.7139793127526392.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717088
[2000]	valid_0's rmse: 0.715113
[3000]	valid_0's rmse: 0.714682
Early stopping, best iteration is:
[3173]	valid_0's rmse: 0.71465
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717044
[2000]	valid_0's rmse: 0.714959
[3000]	valid_0's rmse: 0.714587
Early stopping, best iteration is:
[3214]	valid_0's rmse: 0.714522


[32m[I 2021-08-21 03:36:36,290][0m Trial 29 finished with value: 0.7144388541484309 and parameters: {'reg_lambda': 7.785765097700414, 'reg_alpha': 36.33867180724996, 'subsample': 0.8661534137571989, 'colsample_bytree': 0.6457114939654622, 'learning_rate': 0.06898861926858153, 'min_child_samples': 24, 'num_leaves': 21, 'max_depth': 3, 'max_bin': 739, 'cat_smooth': 90, 'cat_l2': 5.746910227747635}. Best is trial 22 with value: 0.7139793127526392.[0m


Number of finished trials: 30
Best trial: {'reg_lambda': 6.609931804562087, 'reg_alpha': 35.89337620630626, 'subsample': 0.8737357601914469, 'colsample_bytree': 0.6612958156067924, 'learning_rate': 0.06767790040126322, 'min_child_samples': 25, 'num_leaves': 23, 'max_depth': 2, 'max_bin': 747, 'cat_smooth': 89, 'cat_l2': 5.885433554272715}


In [13]:
study.best_params

{'reg_lambda': 6.609931804562087,
 'reg_alpha': 35.89337620630626,
 'subsample': 0.8737357601914469,
 'colsample_bytree': 0.6612958156067924,
 'learning_rate': 0.06767790040126322,
 'min_child_samples': 25,
 'num_leaves': 23,
 'max_depth': 2,
 'max_bin': 747,
 'cat_smooth': 89,
 'cat_l2': 5.885433554272715}

# Log

====== Ordinal encoding =========

0.7179915711010899 row-wise noise ver4

0.7180313553153085 random noise ver5

0.7145452167048295 no noise ver6

0.7143969338945801 no noise ver10 (narrow space)

0.7141318460599843 no noise ver11 (narrow space)

0.7140261333995335 no noise ver12 (narrow space)

====== One-hot encoding =========

0.7242254028037789 no noise ver7

0.7264762744397595 random noise ver8

0.7265375795882082 row-wise noise ver9


