In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 300
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,C,B,B,A,E,C,N,...,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,A,A,B,D,A,F,A,O,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,A,C,B,D,A,D,A,F,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,A,C,B,D,A,E,C,K,...,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,A,C,B,D,A,E,A,N,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [6]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()


Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,4.0,2.0,13.0,...,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,1.0,1.0,0.0,0.0,1.0,3.0,0.0,5.0,0.0,14.0,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,0.0,0.0,0.0,2.0,1.0,3.0,0.0,3.0,0.0,5.0,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,1.0,1.0,0.0,2.0,1.0,3.0,0.0,4.0,2.0,10.0,...,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,0.0,0.0,0.0,2.0,1.0,3.0,0.0,4.0,0.0,13.0,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


In [7]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()
'''

'\n# List of categorical columns\nobject_cols = [col for col in features.columns if \'cat\' in col]\n\n# one-hot-encode categorical columns\nX = features.copy()\nX_test = test.drop([\'id\'], axis=1).copy()\n\noh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")\nX_ohe = oh_encoder.fit_transform(features[object_cols])\nX_test_ohe = oh_encoder.transform(test[object_cols])\n\nX_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])\nX_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])\n\nX = pd.concat([X, X_ohe], axis=1)\nX_test = pd.concat([X_test, X_test_ohe], axis=1)\nX = X.drop(object_cols, axis=1)\nX_test = X_test.drop(object_cols, axis=1)\n    \n# Preview the one-hot-encoded features\nX.head()\n'

In [8]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [9]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [10]:
pseudo = pd.read_csv("../input/competition-part-6-stacking/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [11]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [12]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
              #'device':'gpu',  # Use GPU acceleration
               #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 8.15, 13.15),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 36.46, 42.46),
                'subsample': trial.suggest_uniform('subsample', 0.633, 0.733),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.063,0.143),
                #'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
               'learning_rate':trial.suggest_uniform('learning_rate', 1e-2, 3.4e-2),
                 'min_child_samples':trial.suggest_int('min_child_samples', 22, 32),
              'num_leaves':trial.suggest_int('num_leaves', 22, 36),
              'max_depth':trial.suggest_int('max_depth', 16, 20),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
               'n_estimators':N_ESTIMATORS,
                'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 1122, 1222),
              'cat_smooth':trial.suggest_int('cat_smooth', 78, 90),
              'cat_l2':trial.suggest_loguniform('cat_l2', 1e-4, 10)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_row(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
        categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
    )


    lgb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, lgb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [13]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 10)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-31 11:35:06,061][0m A new study created in memory with name: no-name-50aa14c8-f461-413b-9bc5-024b3656f5ea[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.71428
[2000]	valid_0's rmse: 0.710641
[3000]	valid_0's rmse: 0.709565
[4000]	valid_0's rmse: 0.709132
[5000]	valid_0's rmse: 0.70894
[6000]	valid_0's rmse: 0.708832
[7000]	valid_0's rmse: 0.708765
[8000]	valid_0's rmse: 0.708731
[9000]	valid_0's rmse: 0.708698
[10000]	valid_0's rmse: 0.708689
[11000]	valid_0's rmse: 0.708674
[12000]	valid_0's rmse: 0.708671
Early stopping, best iteration is:
[11997]	valid_0's rmse: 0.70867
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.714339
[2000]	valid_0's rmse: 0.710735
[3000]	valid_0's rmse: 0.709653
[4000]	valid_0's rmse: 0.709247
[5000]	valid_0's rmse: 0.709017
[6000]	valid_0's rmse: 0.708888
[7000]	valid_0's rmse: 0.708818
[8000]	valid_0's rmse: 0.708773
[9000]	valid_0's rmse: 0.70875
[10000]	valid_0's rmse: 0.708734
[11000]	valid_0's rmse: 0.708724
Early stopping, best iteration is:
[11051]	valid_0's rmse: 0.708722


[32m[I 2021-08-31 11:41:48,054][0m Trial 0 finished with value: 0.7085020379442479 and parameters: {'reg_lambda': 11.182025061089908, 'reg_alpha': 40.74771369528633, 'subsample': 0.6382466767725051, 'colsample_bytree': 0.07112539835281764, 'learning_rate': 0.02720620036099216, 'min_child_samples': 30, 'num_leaves': 25, 'max_depth': 17, 'max_bin': 1195, 'cat_smooth': 86, 'cat_l2': 0.001024311015660314}. Best is trial 0 with value: 0.7085020379442479.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.711201
[2000]	valid_0's rmse: 0.709465
[3000]	valid_0's rmse: 0.709011
[4000]	valid_0's rmse: 0.708823
[5000]	valid_0's rmse: 0.708744
[6000]	valid_0's rmse: 0.708706
Early stopping, best iteration is:
[5885]	valid_0's rmse: 0.708697
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.711218
[2000]	valid_0's rmse: 0.70947
[3000]	valid_0's rmse: 0.709032
[4000]	valid_0's rmse: 0.708889
[5000]	valid_0's rmse: 0.708849
Early stopping, best iteration is:
[5320]	valid_0's rmse: 0.708823


[32m[I 2021-08-31 11:44:46,086][0m Trial 1 finished with value: 0.7086743647071457 and parameters: {'reg_lambda': 9.868691456055043, 'reg_alpha': 37.27723657543533, 'subsample': 0.6426169120646947, 'colsample_bytree': 0.12178318814159515, 'learning_rate': 0.028802359660133642, 'min_child_samples': 22, 'num_leaves': 28, 'max_depth': 20, 'max_bin': 1164, 'cat_smooth': 80, 'cat_l2': 6.802183745720561}. Best is trial 0 with value: 0.7085020379442479.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.714768
[2000]	valid_0's rmse: 0.71083
[3000]	valid_0's rmse: 0.709625
[4000]	valid_0's rmse: 0.709129
[5000]	valid_0's rmse: 0.708925
[6000]	valid_0's rmse: 0.708786
[7000]	valid_0's rmse: 0.708682
[8000]	valid_0's rmse: 0.708638
[9000]	valid_0's rmse: 0.708603
[10000]	valid_0's rmse: 0.708587
[11000]	valid_0's rmse: 0.708578
[12000]	valid_0's rmse: 0.708572
Early stopping, best iteration is:
[12168]	valid_0's rmse: 0.708572
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.714758
[2000]	valid_0's rmse: 0.710902
[3000]	valid_0's rmse: 0.70971
[4000]	valid_0's rmse: 0.709261
[5000]	valid_0's rmse: 0.709036
[6000]	valid_0's rmse: 0.708901
[7000]	valid_0's rmse: 0.70883
[8000]	valid_0's rmse: 0.708778
[9000]	valid_0's rmse: 0.70875
[10000]	valid_0's rmse: 0.708727
[11000]	valid_0's rmse: 0.708714
Early stopping, best iteration is:
[11682]	valid_0's rmse: 0.708708


[32m[I 2021-08-31 11:51:52,419][0m Trial 2 finished with value: 0.7084405597978466 and parameters: {'reg_lambda': 11.090838002915286, 'reg_alpha': 41.67765024515988, 'subsample': 0.6656741082078079, 'colsample_bytree': 0.07706111978639668, 'learning_rate': 0.024987468303430946, 'min_child_samples': 23, 'num_leaves': 27, 'max_depth': 19, 'max_bin': 1133, 'cat_smooth': 85, 'cat_l2': 0.00025507853469148873}. Best is trial 2 with value: 0.7084405597978466.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.720145
[2000]	valid_0's rmse: 0.714535
[3000]	valid_0's rmse: 0.712019
[4000]	valid_0's rmse: 0.710781
[5000]	valid_0's rmse: 0.710138
[6000]	valid_0's rmse: 0.709705
[7000]	valid_0's rmse: 0.709416
[8000]	valid_0's rmse: 0.709244
[9000]	valid_0's rmse: 0.709099
[10000]	valid_0's rmse: 0.708999
[11000]	valid_0's rmse: 0.708927
[12000]	valid_0's rmse: 0.708878
[13000]	valid_0's rmse: 0.708822
[14000]	valid_0's rmse: 0.708788
[15000]	valid_0's rmse: 0.708753
[16000]	valid_0's rmse: 0.708736
[17000]	valid_0's rmse: 0.708717
[18000]	valid_0's rmse: 0.708706
[19000]	valid_0's rmse: 0.708694
[20000]	valid_0's rmse: 0.708686
Did not meet early stopping. Best iteration is:
[19999]	valid_0's rmse: 0.708686
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.720495
[2000]	valid_0's rmse: 0.714636
[3000]	valid_0's rmse: 0.712072
[4000]	valid_0's rmse: 0.710813
[5000]	valid_0's

[32m[I 2021-08-31 12:05:18,136][0m Trial 3 finished with value: 0.7085866782722999 and parameters: {'reg_lambda': 10.789258799079104, 'reg_alpha': 42.32247078685093, 'subsample': 0.6743408621407242, 'colsample_bytree': 0.09241700617343969, 'learning_rate': 0.01355543311560728, 'min_child_samples': 27, 'num_leaves': 24, 'max_depth': 18, 'max_bin': 1159, 'cat_smooth': 85, 'cat_l2': 0.002600972700181398}. Best is trial 2 with value: 0.7084405597978466.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.716498
[2000]	valid_0's rmse: 0.71184
[3000]	valid_0's rmse: 0.710199
[4000]	valid_0's rmse: 0.709486
[5000]	valid_0's rmse: 0.709166
[6000]	valid_0's rmse: 0.708962
[7000]	valid_0's rmse: 0.708816
[8000]	valid_0's rmse: 0.708757
[9000]	valid_0's rmse: 0.708699
[10000]	valid_0's rmse: 0.708664
[11000]	valid_0's rmse: 0.708634
[12000]	valid_0's rmse: 0.708616
[13000]	valid_0's rmse: 0.708603
[14000]	valid_0's rmse: 0.708598
[15000]	valid_0's rmse: 0.70859
[16000]	valid_0's rmse: 0.708585
[17000]	valid_0's rmse: 0.708584
[18000]	valid_0's rmse: 0.708581
Early stopping, best iteration is:
[17795]	valid_0's rmse: 0.70858
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.716623
[2000]	valid_0's rmse: 0.711922
[3000]	valid_0's rmse: 0.710287
[4000]	valid_0's rmse: 0.709618
[5000]	valid_0's rmse: 0.709289
[6000]	valid_0's rmse: 0.709105
[7000]	valid_0's rmse: 0.708996
[8

[32m[I 2021-08-31 12:15:32,943][0m Trial 4 finished with value: 0.7084780361256249 and parameters: {'reg_lambda': 8.788229919701983, 'reg_alpha': 41.39646098308347, 'subsample': 0.6896964436319994, 'colsample_bytree': 0.07586082149159314, 'learning_rate': 0.019977718206609513, 'min_child_samples': 23, 'num_leaves': 27, 'max_depth': 17, 'max_bin': 1172, 'cat_smooth': 90, 'cat_l2': 0.47475439968740163}. Best is trial 2 with value: 0.7084405597978466.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.714504
[2000]	valid_0's rmse: 0.710681
[3000]	valid_0's rmse: 0.709549
[4000]	valid_0's rmse: 0.709084
[5000]	valid_0's rmse: 0.70891
[6000]	valid_0's rmse: 0.708808
[7000]	valid_0's rmse: 0.708734
Early stopping, best iteration is:
[6873]	valid_0's rmse: 0.708732
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.714567
[2000]	valid_0's rmse: 0.710739
[3000]	valid_0's rmse: 0.709603
[4000]	valid_0's rmse: 0.709184
[5000]	valid_0's rmse: 0.708966
[6000]	valid_0's rmse: 0.708855
[7000]	valid_0's rmse: 0.708794
[8000]	valid_0's rmse: 0.708742
Early stopping, best iteration is:
[8406]	valid_0's rmse: 0.708725


[32m[I 2021-08-31 12:20:41,196][0m Trial 5 finished with value: 0.7085206865638758 and parameters: {'reg_lambda': 10.956510240703855, 'reg_alpha': 37.94542952619018, 'subsample': 0.702992234789098, 'colsample_bytree': 0.07293183016246671, 'learning_rate': 0.02521678050471909, 'min_child_samples': 23, 'num_leaves': 28, 'max_depth': 19, 'max_bin': 1183, 'cat_smooth': 82, 'cat_l2': 1.525200985661997}. Best is trial 2 with value: 0.7084405597978466.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.717436
[2000]	valid_0's rmse: 0.712301
[3000]	valid_0's rmse: 0.71039
[4000]	valid_0's rmse: 0.709549
[5000]	valid_0's rmse: 0.709166
[6000]	valid_0's rmse: 0.70893
[7000]	valid_0's rmse: 0.70879
[8000]	valid_0's rmse: 0.708715
[9000]	valid_0's rmse: 0.708658
[10000]	valid_0's rmse: 0.708635
[11000]	valid_0's rmse: 0.708612
[12000]	valid_0's rmse: 0.708602
[13000]	valid_0's rmse: 0.708591
Early stopping, best iteration is:
[13557]	valid_0's rmse: 0.708585
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.717501
[2000]	valid_0's rmse: 0.71228
[3000]	valid_0's rmse: 0.710419
[4000]	valid_0's rmse: 0.709666
[5000]	valid_0's rmse: 0.709268
[6000]	valid_0's rmse: 0.709073
[7000]	valid_0's rmse: 0.708958
[8000]	valid_0's rmse: 0.708873
[9000]	valid_0's rmse: 0.708822
[10000]	valid_0's rmse: 0.708762
[11000]	valid_0's rmse: 0.708732
[12000]	valid_0's rmse: 0.708716
[1300

[32m[I 2021-08-31 12:29:53,725][0m Trial 6 finished with value: 0.7084218203643355 and parameters: {'reg_lambda': 9.695042088524165, 'reg_alpha': 40.36958803589236, 'subsample': 0.699449059443989, 'colsample_bytree': 0.09042651635800109, 'learning_rate': 0.01683212648907722, 'min_child_samples': 28, 'num_leaves': 36, 'max_depth': 18, 'max_bin': 1210, 'cat_smooth': 88, 'cat_l2': 4.25044306182083}. Best is trial 6 with value: 0.7084218203643355.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.713972
[2000]	valid_0's rmse: 0.710303
[3000]	valid_0's rmse: 0.709292
[4000]	valid_0's rmse: 0.708902
[5000]	valid_0's rmse: 0.708754
[6000]	valid_0's rmse: 0.708691
[7000]	valid_0's rmse: 0.708645
Early stopping, best iteration is:
[7598]	valid_0's rmse: 0.70863
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.71401
[2000]	valid_0's rmse: 0.710434
[3000]	valid_0's rmse: 0.709441
[4000]	valid_0's rmse: 0.709086
[5000]	valid_0's rmse: 0.708905
[6000]	valid_0's rmse: 0.70881
[7000]	valid_0's rmse: 0.708774
[8000]	valid_0's rmse: 0.708748
Early stopping, best iteration is:
[7827]	valid_0's rmse: 0.708744


[32m[I 2021-08-31 12:35:22,401][0m Trial 7 finished with value: 0.7084561709853378 and parameters: {'reg_lambda': 11.348491695224801, 'reg_alpha': 36.747082407484314, 'subsample': 0.6568473528250526, 'colsample_bytree': 0.09568039622470159, 'learning_rate': 0.025875814045293474, 'min_child_samples': 26, 'num_leaves': 33, 'max_depth': 18, 'max_bin': 1155, 'cat_smooth': 78, 'cat_l2': 0.6446865327297973}. Best is trial 6 with value: 0.7084218203643355.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.717926
[2000]	valid_0's rmse: 0.712732
[3000]	valid_0's rmse: 0.710703
[4000]	valid_0's rmse: 0.709787
[5000]	valid_0's rmse: 0.709382
[6000]	valid_0's rmse: 0.709107
[7000]	valid_0's rmse: 0.708932
[8000]	valid_0's rmse: 0.708835
[9000]	valid_0's rmse: 0.708762
[10000]	valid_0's rmse: 0.708719
[11000]	valid_0's rmse: 0.708688
[12000]	valid_0's rmse: 0.708671
[13000]	valid_0's rmse: 0.708654
[14000]	valid_0's rmse: 0.708646
[15000]	valid_0's rmse: 0.708639
[16000]	valid_0's rmse: 0.708636
[17000]	valid_0's rmse: 0.708633
[18000]	valid_0's rmse: 0.70863
[19000]	valid_0's rmse: 0.708629
Early stopping, best iteration is:
[18736]	valid_0's rmse: 0.708629
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.71806
[2000]	valid_0's rmse: 0.712716
[3000]	valid_0's rmse: 0.710736
[4000]	valid_0's rmse: 0.709874
[5000]	valid_0's rmse: 0.709428
[6000]	valid_0's rmse: 0.709211


[32m[I 2021-08-31 12:45:43,642][0m Trial 8 finished with value: 0.7084977542186186 and parameters: {'reg_lambda': 9.715670209891798, 'reg_alpha': 42.02202087555258, 'subsample': 0.6934154717053296, 'colsample_bytree': 0.08044513258983194, 'learning_rate': 0.016311355058712645, 'min_child_samples': 22, 'num_leaves': 32, 'max_depth': 18, 'max_bin': 1172, 'cat_smooth': 84, 'cat_l2': 2.1497606091661723}. Best is trial 6 with value: 0.7084218203643355.[0m


Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.711027
[2000]	valid_0's rmse: 0.7093
[3000]	valid_0's rmse: 0.708918
[4000]	valid_0's rmse: 0.708758
[5000]	valid_0's rmse: 0.708689
Early stopping, best iteration is:
[5236]	valid_0's rmse: 0.708687
Training until validation scores don't improve for 300 rounds
[1000]	valid_0's rmse: 0.711135
[2000]	valid_0's rmse: 0.709419
[3000]	valid_0's rmse: 0.709001
[4000]	valid_0's rmse: 0.708813
[5000]	valid_0's rmse: 0.708775
Early stopping, best iteration is:
[4813]	valid_0's rmse: 0.708768


[32m[I 2021-08-31 12:48:59,127][0m Trial 9 finished with value: 0.7086432863710612 and parameters: {'reg_lambda': 9.927921932876721, 'reg_alpha': 37.291999072434216, 'subsample': 0.6416524253284042, 'colsample_bytree': 0.117574015299386, 'learning_rate': 0.0299962396771433, 'min_child_samples': 32, 'num_leaves': 26, 'max_depth': 18, 'max_bin': 1181, 'cat_smooth': 80, 'cat_l2': 2.048289774895312}. Best is trial 6 with value: 0.7084218203643355.[0m


Number of finished trials: 10
Best trial: {'reg_lambda': 9.695042088524165, 'reg_alpha': 40.36958803589236, 'subsample': 0.699449059443989, 'colsample_bytree': 0.09042651635800109, 'learning_rate': 0.01683212648907722, 'min_child_samples': 28, 'num_leaves': 36, 'max_depth': 18, 'max_bin': 1210, 'cat_smooth': 88, 'cat_l2': 4.25044306182083}


In [14]:
study.best_params

{'reg_lambda': 9.695042088524165,
 'reg_alpha': 40.36958803589236,
 'subsample': 0.699449059443989,
 'colsample_bytree': 0.09042651635800109,
 'learning_rate': 0.01683212648907722,
 'min_child_samples': 28,
 'num_leaves': 36,
 'max_depth': 18,
 'max_bin': 1210,
 'cat_smooth': 88,
 'cat_l2': 4.25044306182083}

# Log

====== Ordinal encoding =========

0.709174903761106 no noise ver20

0.7086337003244899 no noise ver21 (narrow space)

0.708623028878119 no noise ver22 (narrow space)

0.7085074339931049 no noise ver24 (narrow space) lv1 best

============= lv2 ==================

0.7090075635934157 no noise ver2

0.7085280319966109 no noise ver3 (narrow space)

0.7084477634387564 no noise ver6 (narrow space)
