In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Low MI scores
for pp in ["cat2","cat3","cat4","cat6"]:
    train.pop(pp)
    test.pop(pp)
train.head()

Unnamed: 0,id,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,E,C,N,0.20147,-0.014822,0.669699,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,D,F,A,O,0.743068,0.367411,1.021605,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,D,D,A,F,0.742708,0.310383,-0.012673,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,D,E,C,K,0.429551,0.620998,0.577942,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,D,E,A,N,1.058291,0.367492,-0.052389,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [6]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,E,C,N,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,D,F,A,O,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,D,D,A,F,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,D,E,C,K,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,D,E,A,N,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [7]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()
'''

"\n# List of categorical columns\nobject_cols = [col for col in features.columns if 'cat' in col]\n\n# ordinal-encode categorical columns\nX = features.copy()\nX_test = test.drop(['id'], axis=1).copy()\nordinal_encoder = OrdinalEncoder()\nX[object_cols] = ordinal_encoder.fit_transform(features[object_cols])\nX_test[object_cols] = ordinal_encoder.transform(test[object_cols])\n\n# Preview the ordinal-encoded features\nX.head()\n"

In [8]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()


Unnamed: 0,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,...,ohe_28,ohe_29,ohe_30,ohe_31,ohe_32,ohe_33,ohe_34,ohe_35,ohe_36,ohe_37
0,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [9]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [10]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [11]:
pseudo = pd.read_csv("../input/30-days-pseudo/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [12]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [13]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
              #'device':'gpu',  # Use GPU acceleration
               #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 22, 28),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 17, 20),
                'subsample': trial.suggest_uniform('subsample', 0.233, 0.283 ),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.250, 0.280),
                #'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
               'learning_rate':trial.suggest_uniform('learning_rate', 0.221, 0.251),
                 'min_child_samples':trial.suggest_int('min_child_samples', 106, 112),
              'num_leaves':trial.suggest_int('num_leaves', 88, 100),
              'max_depth':trial.suggest_int('max_depth', 2, 3),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
               'n_estimators':N_ESTIMATORS,
                'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 1148, 1198),
              'cat_smooth':trial.suggest_int('cat_smooth', 78, 84),
              'cat_l2':trial.suggest_loguniform('cat_l2', 2.5, 7.5)
              }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_rn(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
        #categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
    )


    lgb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, lgb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [14]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-29 23:13:25,621][0m A new study created in memory with name: no-name-0f4b0c53-8dc9-4c59-8a56-8664e732be69[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710699
[2000]	valid_0's rmse: 0.709243
Early stopping, best iteration is:
[2740]	valid_0's rmse: 0.709036
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710782
[2000]	valid_0's rmse: 0.709345
[3000]	valid_0's rmse: 0.709163
Early stopping, best iteration is:
[2804]	valid_0's rmse: 0.709104


[32m[I 2021-08-29 23:14:30,627][0m Trial 0 finished with value: 0.7089351115222434 and parameters: {'reg_lambda': 22.715443256327532, 'reg_alpha': 19.107570352189512, 'subsample': 0.2729335339881532, 'colsample_bytree': 0.25617687669996847, 'learning_rate': 0.2392551210199904, 'min_child_samples': 109, 'num_leaves': 90, 'max_depth': 2, 'max_bin': 1184, 'cat_smooth': 78, 'cat_l2': 2.7038060864084072}. Best is trial 0 with value: 0.7089351115222434.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709455
Early stopping, best iteration is:
[1270]	valid_0's rmse: 0.709331
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709515
Early stopping, best iteration is:
[1279]	valid_0's rmse: 0.709393


[32m[I 2021-08-29 23:15:09,980][0m Trial 1 finished with value: 0.7091014253799477 and parameters: {'reg_lambda': 22.37612682198711, 'reg_alpha': 18.343426086150238, 'subsample': 0.268684492064483, 'colsample_bytree': 0.26328250918202467, 'learning_rate': 0.2403001433810683, 'min_child_samples': 110, 'num_leaves': 90, 'max_depth': 3, 'max_bin': 1187, 'cat_smooth': 83, 'cat_l2': 3.19329635579223}. Best is trial 0 with value: 0.7089351115222434.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709415
Early stopping, best iteration is:
[1262]	valid_0's rmse: 0.709288
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709395
Early stopping, best iteration is:
[1019]	valid_0's rmse: 0.70937


[32m[I 2021-08-29 23:15:46,018][0m Trial 2 finished with value: 0.709061002519857 and parameters: {'reg_lambda': 26.422670340057866, 'reg_alpha': 18.04997233883924, 'subsample': 0.2604466154739754, 'colsample_bytree': 0.26238794909870067, 'learning_rate': 0.24877111588214249, 'min_child_samples': 111, 'num_leaves': 90, 'max_depth': 3, 'max_bin': 1169, 'cat_smooth': 83, 'cat_l2': 3.09816731342425}. Best is trial 0 with value: 0.7089351115222434.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710954
[2000]	valid_0's rmse: 0.709386
[3000]	valid_0's rmse: 0.70907
Early stopping, best iteration is:
[3020]	valid_0's rmse: 0.70906
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710821
[2000]	valid_0's rmse: 0.709342
[3000]	valid_0's rmse: 0.709067
Early stopping, best iteration is:
[3481]	valid_0's rmse: 0.709008


[32m[I 2021-08-29 23:17:01,767][0m Trial 3 finished with value: 0.7088634276528123 and parameters: {'reg_lambda': 26.15251122280022, 'reg_alpha': 17.602963729709163, 'subsample': 0.23530633708410942, 'colsample_bytree': 0.2525523264616808, 'learning_rate': 0.242800098591402, 'min_child_samples': 107, 'num_leaves': 90, 'max_depth': 2, 'max_bin': 1175, 'cat_smooth': 80, 'cat_l2': 3.177067772538733}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.70953
Early stopping, best iteration is:
[1352]	valid_0's rmse: 0.709425
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709339
Early stopping, best iteration is:
[1287]	valid_0's rmse: 0.709221


[32m[I 2021-08-29 23:17:42,596][0m Trial 4 finished with value: 0.7090795846442165 and parameters: {'reg_lambda': 22.854017259719463, 'reg_alpha': 17.015539520872135, 'subsample': 0.2812848169853863, 'colsample_bytree': 0.26340200591837043, 'learning_rate': 0.22356445488107668, 'min_child_samples': 106, 'num_leaves': 91, 'max_depth': 3, 'max_bin': 1183, 'cat_smooth': 81, 'cat_l2': 5.597208001038238}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71063
[2000]	valid_0's rmse: 0.70929
Early stopping, best iteration is:
[2718]	valid_0's rmse: 0.709132
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710609
[2000]	valid_0's rmse: 0.709249
[3000]	valid_0's rmse: 0.709023
Early stopping, best iteration is:
[3487]	valid_0's rmse: 0.708999


[32m[I 2021-08-29 23:18:55,008][0m Trial 5 finished with value: 0.7088862973641823 and parameters: {'reg_lambda': 22.42381495159725, 'reg_alpha': 17.174134799615448, 'subsample': 0.26909959092465763, 'colsample_bytree': 0.2544274333692104, 'learning_rate': 0.24943942548443646, 'min_child_samples': 106, 'num_leaves': 95, 'max_depth': 2, 'max_bin': 1184, 'cat_smooth': 79, 'cat_l2': 2.504033759519302}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709386
Early stopping, best iteration is:
[1450]	valid_0's rmse: 0.70917
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709375
Early stopping, best iteration is:
[1494]	valid_0's rmse: 0.709139


[32m[I 2021-08-29 23:19:38,945][0m Trial 6 finished with value: 0.7089118038774981 and parameters: {'reg_lambda': 22.350212167630282, 'reg_alpha': 18.35452082242736, 'subsample': 0.27646304834334423, 'colsample_bytree': 0.26349026661675684, 'learning_rate': 0.22350667453802447, 'min_child_samples': 110, 'num_leaves': 99, 'max_depth': 3, 'max_bin': 1194, 'cat_smooth': 81, 'cat_l2': 5.472346020351298}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710681
[2000]	valid_0's rmse: 0.709307
[3000]	valid_0's rmse: 0.709145
Early stopping, best iteration is:
[3315]	valid_0's rmse: 0.709095
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710789
[2000]	valid_0's rmse: 0.709353
Early stopping, best iteration is:
[2486]	valid_0's rmse: 0.709133


[32m[I 2021-08-29 23:20:47,461][0m Trial 7 finished with value: 0.7089606208095353 and parameters: {'reg_lambda': 22.89545332224384, 'reg_alpha': 18.800158941491095, 'subsample': 0.23695493312947524, 'colsample_bytree': 0.2600125681702363, 'learning_rate': 0.2400843050115501, 'min_child_samples': 112, 'num_leaves': 89, 'max_depth': 2, 'max_bin': 1176, 'cat_smooth': 82, 'cat_l2': 3.84200283391802}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709322
Early stopping, best iteration is:
[1419]	valid_0's rmse: 0.709174
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709458
Early stopping, best iteration is:
[1451]	valid_0's rmse: 0.709305


[32m[I 2021-08-29 23:21:31,284][0m Trial 8 finished with value: 0.7089694135720227 and parameters: {'reg_lambda': 22.368036627046028, 'reg_alpha': 17.86459388672789, 'subsample': 0.2529355925412905, 'colsample_bytree': 0.25599655432464735, 'learning_rate': 0.23945257545710646, 'min_child_samples': 108, 'num_leaves': 89, 'max_depth': 3, 'max_bin': 1182, 'cat_smooth': 78, 'cat_l2': 2.689511524568169}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709321
Early stopping, best iteration is:
[1163]	valid_0's rmse: 0.70917
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.709495
Early stopping, best iteration is:
[1318]	valid_0's rmse: 0.709337


[32m[I 2021-08-29 23:22:10,575][0m Trial 9 finished with value: 0.7090051067913 and parameters: {'reg_lambda': 24.00477175516379, 'reg_alpha': 19.65535035508909, 'subsample': 0.2354872190931988, 'colsample_bytree': 0.26219473729345055, 'learning_rate': 0.2379765627473233, 'min_child_samples': 107, 'num_leaves': 89, 'max_depth': 3, 'max_bin': 1179, 'cat_smooth': 80, 'cat_l2': 6.706502125292348}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71076
[2000]	valid_0's rmse: 0.709276
[3000]	valid_0's rmse: 0.70903
Early stopping, best iteration is:
[2867]	valid_0's rmse: 0.709024
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710813
[2000]	valid_0's rmse: 0.709349
[3000]	valid_0's rmse: 0.709084
Early stopping, best iteration is:
[2856]	valid_0's rmse: 0.709075


[32m[I 2021-08-29 23:23:18,241][0m Trial 10 finished with value: 0.7089076011726827 and parameters: {'reg_lambda': 27.706079887023044, 'reg_alpha': 17.499531809720505, 'subsample': 0.24539870564460492, 'colsample_bytree': 0.27459161733675236, 'learning_rate': 0.23107728441597364, 'min_child_samples': 108, 'num_leaves': 94, 'max_depth': 2, 'max_bin': 1151, 'cat_smooth': 80, 'cat_l2': 4.161454446926812}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710945
[2000]	valid_0's rmse: 0.709499
[3000]	valid_0's rmse: 0.709283
Early stopping, best iteration is:
[2983]	valid_0's rmse: 0.709276
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710803
[2000]	valid_0's rmse: 0.709473
[3000]	valid_0's rmse: 0.709224
Early stopping, best iteration is:
[3205]	valid_0's rmse: 0.709207


[32m[I 2021-08-29 23:24:31,228][0m Trial 11 finished with value: 0.7090650509495239 and parameters: {'reg_lambda': 25.66295655002827, 'reg_alpha': 17.068294465112253, 'subsample': 0.26347357575135943, 'colsample_bytree': 0.2504324152626515, 'learning_rate': 0.2503769469193417, 'min_child_samples': 106, 'num_leaves': 96, 'max_depth': 2, 'max_bin': 1166, 'cat_smooth': 79, 'cat_l2': 3.417791354619208}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710897
[2000]	valid_0's rmse: 0.70933
Early stopping, best iteration is:
[2794]	valid_0's rmse: 0.709086
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710797
[2000]	valid_0's rmse: 0.709457
[3000]	valid_0's rmse: 0.709255
Early stopping, best iteration is:
[2874]	valid_0's rmse: 0.709244


[32m[I 2021-08-29 23:25:39,792][0m Trial 12 finished with value: 0.7089929338121823 and parameters: {'reg_lambda': 27.17090121002347, 'reg_alpha': 17.399983960967447, 'subsample': 0.25163138907850974, 'colsample_bytree': 0.25042992462229374, 'learning_rate': 0.24620506084424368, 'min_child_samples': 106, 'num_leaves': 94, 'max_depth': 2, 'max_bin': 1161, 'cat_smooth': 79, 'cat_l2': 2.5988997704322094}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710545
[2000]	valid_0's rmse: 0.709276
[3000]	valid_0's rmse: 0.709016
Early stopping, best iteration is:
[2997]	valid_0's rmse: 0.709015
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710711
[2000]	valid_0's rmse: 0.709475
Early stopping, best iteration is:
[2217]	valid_0's rmse: 0.709356


[32m[I 2021-08-29 23:26:44,531][0m Trial 13 finished with value: 0.709021660604894 and parameters: {'reg_lambda': 24.61287487693533, 'reg_alpha': 17.492482470727914, 'subsample': 0.2668166234166488, 'colsample_bytree': 0.2717581671472758, 'learning_rate': 0.24501138591259417, 'min_child_samples': 107, 'num_leaves': 97, 'max_depth': 2, 'max_bin': 1158, 'cat_smooth': 79, 'cat_l2': 2.5193072251546833}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.711005
[2000]	valid_0's rmse: 0.709481
Early stopping, best iteration is:
[2695]	valid_0's rmse: 0.709253
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710973
[2000]	valid_0's rmse: 0.709442
[3000]	valid_0's rmse: 0.709196
Early stopping, best iteration is:
[3421]	valid_0's rmse: 0.709116


[32m[I 2021-08-29 23:27:57,220][0m Trial 14 finished with value: 0.7090085856598115 and parameters: {'reg_lambda': 26.06179765548124, 'reg_alpha': 17.00866242579231, 'subsample': 0.24201227370227982, 'colsample_bytree': 0.2541092788475389, 'learning_rate': 0.23245738815012248, 'min_child_samples': 107, 'num_leaves': 92, 'max_depth': 2, 'max_bin': 1196, 'cat_smooth': 80, 'cat_l2': 3.531288992424389}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710701
[2000]	valid_0's rmse: 0.709297
[3000]	valid_0's rmse: 0.709043
Early stopping, best iteration is:
[2847]	valid_0's rmse: 0.709027
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710723
[2000]	valid_0's rmse: 0.709404
[3000]	valid_0's rmse: 0.709204
Early stopping, best iteration is:
[3217]	valid_0's rmse: 0.709165


[32m[I 2021-08-29 23:29:09,253][0m Trial 15 finished with value: 0.7089493962219469 and parameters: {'reg_lambda': 23.895808878644438, 'reg_alpha': 17.840984015997837, 'subsample': 0.2563603410012719, 'colsample_bytree': 0.2681824641413678, 'learning_rate': 0.24479276331879418, 'min_child_samples': 108, 'num_leaves': 96, 'max_depth': 2, 'max_bin': 1191, 'cat_smooth': 78, 'cat_l2': 2.9324433382832913}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710631
[2000]	valid_0's rmse: 0.709196
[3000]	valid_0's rmse: 0.708961
Early stopping, best iteration is:
[3056]	valid_0's rmse: 0.708951
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710704
[2000]	valid_0's rmse: 0.709333
Early stopping, best iteration is:
[2731]	valid_0's rmse: 0.709201


[32m[I 2021-08-29 23:30:21,169][0m Trial 16 finished with value: 0.7089268786818612 and parameters: {'reg_lambda': 27.127295747421954, 'reg_alpha': 17.43287323651181, 'subsample': 0.2823880324575902, 'colsample_bytree': 0.27943825493411656, 'learning_rate': 0.24943417795686126, 'min_child_samples': 106, 'num_leaves': 93, 'max_depth': 2, 'max_bin': 1172, 'cat_smooth': 80, 'cat_l2': 4.873293626681055}. Best is trial 3 with value: 0.7088634276528123.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710802
[2000]	valid_0's rmse: 0.709281
[3000]	valid_0's rmse: 0.70908
Early stopping, best iteration is:
[3735]	valid_0's rmse: 0.708984
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710749
[2000]	valid_0's rmse: 0.709255
Early stopping, best iteration is:
[2734]	valid_0's rmse: 0.709034


[32m[I 2021-08-29 23:31:36,741][0m Trial 17 finished with value: 0.708826260098729 and parameters: {'reg_lambda': 24.984742572351553, 'reg_alpha': 17.238314749729266, 'subsample': 0.27231281807556784, 'colsample_bytree': 0.25249704379805205, 'learning_rate': 0.24495104991851, 'min_child_samples': 107, 'num_leaves': 100, 'max_depth': 2, 'max_bin': 1175, 'cat_smooth': 82, 'cat_l2': 3.797122304934783}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710894
[2000]	valid_0's rmse: 0.709248
Early stopping, best iteration is:
[2707]	valid_0's rmse: 0.709066
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710848
[2000]	valid_0's rmse: 0.709399
[3000]	valid_0's rmse: 0.709058
Early stopping, best iteration is:
[3387]	valid_0's rmse: 0.708999


[32m[I 2021-08-29 23:32:50,162][0m Trial 18 finished with value: 0.7088555310896881 and parameters: {'reg_lambda': 25.19570054975197, 'reg_alpha': 17.79050553143354, 'subsample': 0.2744445248833738, 'colsample_bytree': 0.250405196687137, 'learning_rate': 0.24286550517926792, 'min_child_samples': 109, 'num_leaves': 99, 'max_depth': 2, 'max_bin': 1175, 'cat_smooth': 82, 'cat_l2': 4.222309564793502}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710711
[2000]	valid_0's rmse: 0.709345
Early stopping, best iteration is:
[2663]	valid_0's rmse: 0.709169
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710871
[2000]	valid_0's rmse: 0.709414
Early stopping, best iteration is:
[2671]	valid_0's rmse: 0.709227


[32m[I 2021-08-29 23:33:53,885][0m Trial 19 finished with value: 0.70905776645786 and parameters: {'reg_lambda': 25.170515756341217, 'reg_alpha': 18.091590221971437, 'subsample': 0.2764624965706549, 'colsample_bytree': 0.2585057084554969, 'learning_rate': 0.23468991896013347, 'min_child_samples': 110, 'num_leaves': 100, 'max_depth': 2, 'max_bin': 1164, 'cat_smooth': 84, 'cat_l2': 4.630061825741206}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710787
[2000]	valid_0's rmse: 0.709197
[3000]	valid_0's rmse: 0.708988
Early stopping, best iteration is:
[3111]	valid_0's rmse: 0.70897
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710756
[2000]	valid_0's rmse: 0.709358
[3000]	valid_0's rmse: 0.709175
Early stopping, best iteration is:
[3327]	valid_0's rmse: 0.709109


[32m[I 2021-08-29 23:35:09,622][0m Trial 20 finished with value: 0.7088677316236124 and parameters: {'reg_lambda': 24.61922679153974, 'reg_alpha': 18.61594662260193, 'subsample': 0.27760700508305575, 'colsample_bytree': 0.25031623020811566, 'learning_rate': 0.2432468857938558, 'min_child_samples': 109, 'num_leaves': 99, 'max_depth': 2, 'max_bin': 1155, 'cat_smooth': 82, 'cat_l2': 4.077733159618642}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710898
[2000]	valid_0's rmse: 0.709261
[3000]	valid_0's rmse: 0.708969
Early stopping, best iteration is:
[3036]	valid_0's rmse: 0.708947
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710843
[2000]	valid_0's rmse: 0.709387
[3000]	valid_0's rmse: 0.709156
Early stopping, best iteration is:
[3058]	valid_0's rmse: 0.709105


[32m[I 2021-08-29 23:36:20,928][0m Trial 21 finished with value: 0.708859367923011 and parameters: {'reg_lambda': 25.45286795305667, 'reg_alpha': 17.786475379070804, 'subsample': 0.2727394689011271, 'colsample_bytree': 0.2520527652006119, 'learning_rate': 0.24189703380913885, 'min_child_samples': 108, 'num_leaves': 100, 'max_depth': 2, 'max_bin': 1172, 'cat_smooth': 82, 'cat_l2': 3.6736800627864166}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71086
[2000]	valid_0's rmse: 0.709314
[3000]	valid_0's rmse: 0.709063
Early stopping, best iteration is:
[3023]	valid_0's rmse: 0.709055
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710927
[2000]	valid_0's rmse: 0.709404
[3000]	valid_0's rmse: 0.709208
Early stopping, best iteration is:
[2961]	valid_0's rmse: 0.709198


[32m[I 2021-08-29 23:37:31,497][0m Trial 22 finished with value: 0.7089567838213009 and parameters: {'reg_lambda': 25.320778805552063, 'reg_alpha': 17.817310264266386, 'subsample': 0.27270445286859435, 'colsample_bytree': 0.2522292212143319, 'learning_rate': 0.24705741834640357, 'min_child_samples': 108, 'num_leaves': 100, 'max_depth': 2, 'max_bin': 1170, 'cat_smooth': 82, 'cat_l2': 3.7376400798683616}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710626
[2000]	valid_0's rmse: 0.709316
Early stopping, best iteration is:
[2589]	valid_0's rmse: 0.709124
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710742
[2000]	valid_0's rmse: 0.709369
[3000]	valid_0's rmse: 0.70918
Early stopping, best iteration is:
[3233]	valid_0's rmse: 0.709139


[32m[I 2021-08-29 23:38:37,225][0m Trial 23 finished with value: 0.7089780707775998 and parameters: {'reg_lambda': 24.203251463786344, 'reg_alpha': 17.23935267260174, 'subsample': 0.2826288044155043, 'colsample_bytree': 0.25795125885416975, 'learning_rate': 0.24261211455327675, 'min_child_samples': 109, 'num_leaves': 98, 'max_depth': 2, 'max_bin': 1177, 'cat_smooth': 83, 'cat_l2': 4.484256168309575}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710994
[2000]	valid_0's rmse: 0.709467
[3000]	valid_0's rmse: 0.709223
Early stopping, best iteration is:
[3101]	valid_0's rmse: 0.709205
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710984
[2000]	valid_0's rmse: 0.709499
[3000]	valid_0's rmse: 0.709285
Early stopping, best iteration is:
[3043]	valid_0's rmse: 0.709263


[32m[I 2021-08-29 23:39:42,952][0m Trial 24 finished with value: 0.7090686846797555 and parameters: {'reg_lambda': 23.434615248536158, 'reg_alpha': 18.125727515919237, 'subsample': 0.2642594107491947, 'colsample_bytree': 0.2503015898256268, 'learning_rate': 0.23702916860114442, 'min_child_samples': 108, 'num_leaves': 100, 'max_depth': 2, 'max_bin': 1167, 'cat_smooth': 82, 'cat_l2': 5.049585258280801}. Best is trial 17 with value: 0.708826260098729.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710772
[2000]	valid_0's rmse: 0.709146
[3000]	valid_0's rmse: 0.708907
Early stopping, best iteration is:
[3108]	valid_0's rmse: 0.70886
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710863
[2000]	valid_0's rmse: 0.709325
[3000]	valid_0's rmse: 0.709066
Early stopping, best iteration is:
[3512]	valid_0's rmse: 0.708991


[32m[I 2021-08-29 23:40:53,910][0m Trial 25 finished with value: 0.7087611371131594 and parameters: {'reg_lambda': 25.58131029123097, 'reg_alpha': 17.731297121842683, 'subsample': 0.27276100882279997, 'colsample_bytree': 0.25277319511583163, 'learning_rate': 0.2347561485788669, 'min_child_samples': 109, 'num_leaves': 98, 'max_depth': 2, 'max_bin': 1172, 'cat_smooth': 84, 'cat_l2': 4.034749139061899}. Best is trial 25 with value: 0.7087611371131594.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710799
[2000]	valid_0's rmse: 0.709318
[3000]	valid_0's rmse: 0.709066
Early stopping, best iteration is:
[3241]	valid_0's rmse: 0.709033
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710962
[2000]	valid_0's rmse: 0.709358
Early stopping, best iteration is:
[2466]	valid_0's rmse: 0.70919


[32m[I 2021-08-29 23:41:55,246][0m Trial 26 finished with value: 0.7089644213085298 and parameters: {'reg_lambda': 24.80945696631139, 'reg_alpha': 17.26313253271195, 'subsample': 0.27916528592532386, 'colsample_bytree': 0.25518348814901454, 'learning_rate': 0.22743610554973684, 'min_child_samples': 111, 'num_leaves': 98, 'max_depth': 2, 'max_bin': 1179, 'cat_smooth': 84, 'cat_l2': 4.042733652119477}. Best is trial 25 with value: 0.7087611371131594.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710784
[2000]	valid_0's rmse: 0.709439
[3000]	valid_0's rmse: 0.709187
Early stopping, best iteration is:
[3062]	valid_0's rmse: 0.709167
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710811
[2000]	valid_0's rmse: 0.709327
Early stopping, best iteration is:
[2799]	valid_0's rmse: 0.709103


[32m[I 2021-08-29 23:42:58,976][0m Trial 27 finished with value: 0.7089901509294283 and parameters: {'reg_lambda': 25.764790533150926, 'reg_alpha': 17.67467465110331, 'subsample': 0.27067863479654863, 'colsample_bytree': 0.26784333508014385, 'learning_rate': 0.23549713476026282, 'min_child_samples': 110, 'num_leaves': 98, 'max_depth': 2, 'max_bin': 1162, 'cat_smooth': 84, 'cat_l2': 4.326086026463653}. Best is trial 25 with value: 0.7087611371131594.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710761
[2000]	valid_0's rmse: 0.709212
Early stopping, best iteration is:
[2703]	valid_0's rmse: 0.708954
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71098
[2000]	valid_0's rmse: 0.709281
[3000]	valid_0's rmse: 0.708982
Early stopping, best iteration is:
[3022]	valid_0's rmse: 0.708978


[32m[I 2021-08-29 23:44:02,360][0m Trial 28 finished with value: 0.7088301711365852 and parameters: {'reg_lambda': 26.583691957353626, 'reg_alpha': 18.04001149977492, 'subsample': 0.26246608418428036, 'colsample_bytree': 0.25851307781428995, 'learning_rate': 0.22974738457397567, 'min_child_samples': 109, 'num_leaves': 99, 'max_depth': 2, 'max_bin': 1174, 'cat_smooth': 83, 'cat_l2': 5.368315395997765}. Best is trial 25 with value: 0.7087611371131594.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.710884
[2000]	valid_0's rmse: 0.709281
[3000]	valid_0's rmse: 0.709028
Early stopping, best iteration is:
[3516]	valid_0's rmse: 0.708977
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.711015
[2000]	valid_0's rmse: 0.709438
[3000]	valid_0's rmse: 0.709168
Early stopping, best iteration is:
[3181]	valid_0's rmse: 0.709155


[32m[I 2021-08-29 23:45:15,293][0m Trial 29 finished with value: 0.7089266036447274 and parameters: {'reg_lambda': 26.379595954595057, 'reg_alpha': 19.082812922511707, 'subsample': 0.2608117444177106, 'colsample_bytree': 0.25931013141534454, 'learning_rate': 0.22848788180555987, 'min_child_samples': 109, 'num_leaves': 97, 'max_depth': 2, 'max_bin': 1187, 'cat_smooth': 83, 'cat_l2': 6.425095070474276}. Best is trial 25 with value: 0.7087611371131594.[0m


Number of finished trials: 30
Best trial: {'reg_lambda': 25.58131029123097, 'reg_alpha': 17.731297121842683, 'subsample': 0.27276100882279997, 'colsample_bytree': 0.25277319511583163, 'learning_rate': 0.2347561485788669, 'min_child_samples': 109, 'num_leaves': 98, 'max_depth': 2, 'max_bin': 1172, 'cat_smooth': 84, 'cat_l2': 4.034749139061899}


In [15]:
study.best_params

{'reg_lambda': 25.58131029123097,
 'reg_alpha': 17.731297121842683,
 'subsample': 0.27276100882279997,
 'colsample_bytree': 0.25277319511583163,
 'learning_rate': 0.2347561485788669,
 'min_child_samples': 109,
 'num_leaves': 98,
 'max_depth': 2,
 'max_bin': 1172,
 'cat_smooth': 84,
 'cat_l2': 4.034749139061899}

# Log

====== Ordinal encoding =========

0.7123367407151787 no noise ver1 final

0.7158040228380602 row-wise noise ver2

0.7157995952863279 random noise ver3

====== One-hot encoding =========

0.7203460513257339 no noise ver4

0.7090027494545778 no noise ver9

0.7089277083045002 no noise ver10 (narrow space)

0.7088992777144029 no noise ver11 (narrow space)

0.7088362570431959 no noise ver12 (narrow space)