In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Low MI scores
for pp in ["cat2","cat3","cat4","cat6"]:
    train.pop(pp)
    test.pop(pp)
train.head()

Unnamed: 0,id,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,E,C,N,0.20147,-0.014822,0.669699,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,D,F,A,O,0.743068,0.367411,1.021605,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,D,D,A,F,0.742708,0.310383,-0.012673,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,D,E,C,K,0.429551,0.620998,0.577942,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,D,E,A,N,1.058291,0.367492,-0.052389,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [6]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,E,C,N,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,D,F,A,O,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,D,D,A,F,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,D,E,C,K,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,D,E,A,N,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [7]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()


Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,1.0,1.0,1.0,4.0,2.0,13.0,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,1.0,1.0,3.0,5.0,0.0,14.0,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,0.0,0.0,3.0,3.0,0.0,5.0,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,1.0,1.0,3.0,4.0,2.0,10.0,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,0.0,0.0,3.0,4.0,0.0,13.0,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


In [8]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()
'''

'\n# List of categorical columns\nobject_cols = [col for col in features.columns if \'cat\' in col]\n\n# one-hot-encode categorical columns\nX = features.copy()\nX_test = test.drop([\'id\'], axis=1).copy()\n\noh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")\nX_ohe = oh_encoder.fit_transform(features[object_cols])\nX_test_ohe = oh_encoder.transform(test[object_cols])\n\nX_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])\nX_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])\n\nX = pd.concat([X, X_ohe], axis=1)\nX_test = pd.concat([X_test, X_test_ohe], axis=1)\nX = X.drop(object_cols, axis=1)\nX_test = X_test.drop(object_cols, axis=1)\n    \n# Preview the one-hot-encoded features\nX.head()\n'

In [9]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [10]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [11]:
pseudo = pd.read_csv("../input/30-days-pseudo/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [12]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [13]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
               #'device':'gpu',  # Use GPU acceleration
               #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 0.01, 100),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 0.01, 50),
                'subsample': trial.suggest_uniform('subsample', 0.2, 1.0),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.1, 1.0),
                #'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
               'learning_rate':trial.suggest_uniform('learning_rate', 1e-3, 1),
                 'min_child_samples':trial.suggest_int('min_child_samples', 5, 100),
              'num_leaves':trial.suggest_int('num_leaves', 10, 200),
              'max_depth':trial.suggest_int('max_depth', 2, 30),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
               'n_estimators':N_ESTIMATORS,
                'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 300, 1000),
              'cat_smooth':trial.suggest_int('cat_smooth', 5, 100),
              'cat_l2':trial.suggest_loguniform('cat_l2', 1e-3, 100)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_rn(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
        categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
    )


    lgb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, lgb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [14]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-23 14:51:01,994][0m A new study created in memory with name: no-name-6c6c752f-2de4-4881-9e75-d666af1da3e2[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[78]	valid_0's rmse: 0.715448
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:51:25,681][0m Trial 0 finished with value: 0.7144756146995378 and parameters: {'reg_lambda': 16.387039405987355, 'reg_alpha': 16.247821826951892, 'subsample': 0.47752431429381376, 'colsample_bytree': 0.8960762060747465, 'learning_rate': 0.29407458120981017, 'min_child_samples': 74, 'num_leaves': 78, 'max_depth': 24, 'max_bin': 917, 'cat_smooth': 16, 'cat_l2': 0.006712594921545039}. Best is trial 0 with value: 0.7144756146995378.[0m


Early stopping, best iteration is:
[66]	valid_0's rmse: 0.715173
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[36]	valid_0's rmse: 0.716238
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:51:46,559][0m Trial 1 finished with value: 0.7148411154381455 and parameters: {'reg_lambda': 23.228329738145455, 'reg_alpha': 15.578555086069409, 'subsample': 0.7697016718190468, 'colsample_bytree': 0.27225576696605913, 'learning_rate': 0.5281783173066487, 'min_child_samples': 42, 'num_leaves': 189, 'max_depth': 30, 'max_bin': 678, 'cat_smooth': 85, 'cat_l2': 0.0194932122458214}. Best is trial 0 with value: 0.7144756146995378.[0m


Early stopping, best iteration is:
[34]	valid_0's rmse: 0.716729
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717176
[2000]	valid_0's rmse: 0.714629
[3000]	valid_0's rmse: 0.71387
[4000]	valid_0's rmse: 0.713512
[5000]	valid_0's rmse: 0.713399
Early stopping, best iteration is:
[5502]	valid_0's rmse: 0.713359
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717222
[2000]	valid_0's rmse: 0.71457
[3000]	valid_0's rmse: 0.713766
[4000]	valid_0's rmse: 0.713481
[5000]	valid_0's rmse: 0.713401
Early stopping, best iteration is:
[4919]	valid_0's rmse: 0.713393


[32m[I 2021-08-23 14:54:38,653][0m Trial 2 finished with value: 0.7132650949389292 and parameters: {'reg_lambda': 28.724637450305423, 'reg_alpha': 8.237252813254061, 'subsample': 0.8342652462801474, 'colsample_bytree': 0.7884174555977527, 'learning_rate': 0.04221322279986934, 'min_child_samples': 73, 'num_leaves': 174, 'max_depth': 3, 'max_bin': 649, 'cat_smooth': 94, 'cat_l2': 0.007582993419544741}. Best is trial 2 with value: 0.7132650949389292.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[93]	valid_0's rmse: 0.715675
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:54:51,443][0m Trial 3 finished with value: 0.7146542571971023 and parameters: {'reg_lambda': 81.14045437269921, 'reg_alpha': 1.9171704252142114, 'subsample': 0.7677794589692766, 'colsample_bytree': 0.7180762941762456, 'learning_rate': 0.735928067253769, 'min_child_samples': 7, 'num_leaves': 22, 'max_depth': 4, 'max_bin': 552, 'cat_smooth': 20, 'cat_l2': 15.004566215212657}. Best is trial 2 with value: 0.7132650949389292.[0m


Early stopping, best iteration is:
[83]	valid_0's rmse: 0.716323
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[77]	valid_0's rmse: 0.715973
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[84]	valid_0's rmse: 0.715762


[32m[I 2021-08-23 14:55:09,865][0m Trial 4 finished with value: 0.7150679028889722 and parameters: {'reg_lambda': 26.841546383498468, 'reg_alpha': 9.427479854204208, 'subsample': 0.7328218145245291, 'colsample_bytree': 0.9559871327565951, 'learning_rate': 0.2852864807217327, 'min_child_samples': 35, 'num_leaves': 63, 'max_depth': 22, 'max_bin': 558, 'cat_smooth': 32, 'cat_l2': 2.7342986083868244}. Best is trial 2 with value: 0.7132650949389292.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[13]	valid_0's rmse: 0.720918
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:55:35,268][0m Trial 5 finished with value: 0.7173494480857486 and parameters: {'reg_lambda': 51.22816142593612, 'reg_alpha': 17.02485329998982, 'subsample': 0.5789534398033316, 'colsample_bytree': 0.6993003270243139, 'learning_rate': 0.9145970843957766, 'min_child_samples': 67, 'num_leaves': 179, 'max_depth': 14, 'max_bin': 742, 'cat_smooth': 99, 'cat_l2': 9.92274428106438}. Best is trial 2 with value: 0.7132650949389292.[0m


Early stopping, best iteration is:
[12]	valid_0's rmse: 0.720798
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[16]	valid_0's rmse: 0.719344
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:56:07,835][0m Trial 6 finished with value: 0.7175702069793399 and parameters: {'reg_lambda': 94.93766194284426, 'reg_alpha': 47.90907285546832, 'subsample': 0.24167634324419832, 'colsample_bytree': 0.9545518564665488, 'learning_rate': 0.7666993318335387, 'min_child_samples': 65, 'num_leaves': 172, 'max_depth': 25, 'max_bin': 871, 'cat_smooth': 14, 'cat_l2': 0.00517065163155389}. Best is trial 2 with value: 0.7132650949389292.[0m


Early stopping, best iteration is:
[22]	valid_0's rmse: 0.720341
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[171]	valid_0's rmse: 0.715402
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:56:26,124][0m Trial 7 finished with value: 0.7141621455990994 and parameters: {'reg_lambda': 10.266409756755314, 'reg_alpha': 8.641181187142795, 'subsample': 0.837687275623596, 'colsample_bytree': 0.4711632750651541, 'learning_rate': 0.9842516933408881, 'min_child_samples': 15, 'num_leaves': 146, 'max_depth': 3, 'max_bin': 900, 'cat_smooth': 15, 'cat_l2': 0.009197625814676291}. Best is trial 2 with value: 0.7132650949389292.[0m


Early stopping, best iteration is:
[167]	valid_0's rmse: 0.715456
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[34]	valid_0's rmse: 0.715639
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 14:56:44,515][0m Trial 8 finished with value: 0.7143757579829529 and parameters: {'reg_lambda': 60.53904487069175, 'reg_alpha': 22.36382207272888, 'subsample': 0.431859625794627, 'colsample_bytree': 0.44631027880216545, 'learning_rate': 0.6852600749910992, 'min_child_samples': 24, 'num_leaves': 63, 'max_depth': 27, 'max_bin': 992, 'cat_smooth': 14, 'cat_l2': 0.019621318693419806}. Best is trial 2 with value: 0.7132650949389292.[0m


Early stopping, best iteration is:
[42]	valid_0's rmse: 0.716455
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713702
Early stopping, best iteration is:
[1227]	valid_0's rmse: 0.713653
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713729
Early stopping, best iteration is:
[1057]	valid_0's rmse: 0.713695


[32m[I 2021-08-23 14:57:34,180][0m Trial 9 finished with value: 0.7134546621684625 and parameters: {'reg_lambda': 87.1872056685274, 'reg_alpha': 14.98089307483873, 'subsample': 0.5022897315112489, 'colsample_bytree': 0.9081661958906263, 'learning_rate': 0.1285046511755429, 'min_child_samples': 7, 'num_leaves': 153, 'max_depth': 4, 'max_bin': 658, 'cat_smooth': 22, 'cat_l2': 1.0346303645447643}. Best is trial 2 with value: 0.7132650949389292.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722353
[2000]	valid_0's rmse: 0.718564
[3000]	valid_0's rmse: 0.71684
[4000]	valid_0's rmse: 0.715884
[5000]	valid_0's rmse: 0.715388
[6000]	valid_0's rmse: 0.715071
[7000]	valid_0's rmse: 0.714885
[8000]	valid_0's rmse: 0.714776
[9000]	valid_0's rmse: 0.714696
[10000]	valid_0's rmse: 0.714647
Early stopping, best iteration is:
[10656]	valid_0's rmse: 0.71462
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722418
[2000]	valid_0's rmse: 0.718628
[3000]	valid_0's rmse: 0.716959
[4000]	valid_0's rmse: 0.716014
[5000]	valid_0's rmse: 0.715492
[6000]	valid_0's rmse: 0.715219
[7000]	valid_0's rmse: 0.715055
[8000]	valid_0's rmse: 0.714942
[9000]	valid_0's rmse: 0.714843
[10000]	valid_0's rmse: 0.714808
Early stopping, best iteration is:
[10721]	valid_0's rmse: 0.714772


[32m[I 2021-08-23 15:13:04,996][0m Trial 10 finished with value: 0.7145723387744957 and parameters: {'reg_lambda': 38.608173707253336, 'reg_alpha': 36.7293178212926, 'subsample': 0.9881069681441683, 'colsample_bytree': 0.6975559751232432, 'learning_rate': 0.004177060932049899, 'min_child_samples': 100, 'num_leaves': 127, 'max_depth': 9, 'max_bin': 302, 'cat_smooth': 63, 'cat_l2': 0.14181384129201596}. Best is trial 2 with value: 0.7132650949389292.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722299
[2000]	valid_0's rmse: 0.718467
[3000]	valid_0's rmse: 0.716783
[4000]	valid_0's rmse: 0.715827
[5000]	valid_0's rmse: 0.715359
[6000]	valid_0's rmse: 0.715105
[7000]	valid_0's rmse: 0.714972
Early stopping, best iteration is:
[7577]	valid_0's rmse: 0.714947
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722303
[2000]	valid_0's rmse: 0.718643
[3000]	valid_0's rmse: 0.716931
[4000]	valid_0's rmse: 0.715955
[5000]	valid_0's rmse: 0.715438
[6000]	valid_0's rmse: 0.715176
[7000]	valid_0's rmse: 0.715021
[8000]	valid_0's rmse: 0.714943
Early stopping, best iteration is:
[8719]	valid_0's rmse: 0.714904


[32m[I 2021-08-23 15:24:42,607][0m Trial 11 finished with value: 0.7147615017191958 and parameters: {'reg_lambda': 72.38985654993505, 'reg_alpha': 2.239367684818375, 'subsample': 0.3136051762690186, 'colsample_bytree': 0.8298965392494919, 'learning_rate': 0.004070705965308619, 'min_child_samples': 98, 'num_leaves': 145, 'max_depth': 8, 'max_bin': 430, 'cat_smooth': 51, 'cat_l2': 0.42931778334933557}. Best is trial 2 with value: 0.7132650949389292.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[336]	valid_0's rmse: 0.714358
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[298]	valid_0's rmse: 0.71489


[32m[I 2021-08-23 15:25:23,902][0m Trial 12 finished with value: 0.7142657135350746 and parameters: {'reg_lambda': 96.52713055240292, 'reg_alpha': 30.70129621881879, 'subsample': 0.9479967269427239, 'colsample_bytree': 0.8110306816734141, 'learning_rate': 0.13089462845201408, 'min_child_samples': 82, 'num_leaves': 112, 'max_depth': 8, 'max_bin': 765, 'cat_smooth': 49, 'cat_l2': 0.0010951896885448237}. Best is trial 2 with value: 0.7132650949389292.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714061
[2000]	valid_0's rmse: 0.713093
Early stopping, best iteration is:
[2538]	valid_0's rmse: 0.713021
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714208
[2000]	valid_0's rmse: 0.713229
Early stopping, best iteration is:
[1988]	valid_0's rmse: 0.713217


[32m[I 2021-08-23 15:26:56,216][0m Trial 13 finished with value: 0.7129671568808704 and parameters: {'reg_lambda': 36.86116989706787, 'reg_alpha': 8.868703818825622, 'subsample': 0.6095692653245135, 'colsample_bytree': 0.5738076013007622, 'learning_rate': 0.19839609159739033, 'min_child_samples': 51, 'num_leaves': 199, 'max_depth': 2, 'max_bin': 554, 'cat_smooth': 76, 'cat_l2': 0.39251082737096415}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[40]	valid_0's rmse: 0.717986
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 15:27:14,552][0m Trial 14 finished with value: 0.7162196169488023 and parameters: {'reg_lambda': 2.2903355290088854, 'reg_alpha': 0.5778833334674403, 'subsample': 0.6195810935432051, 'colsample_bytree': 0.5852132134272477, 'learning_rate': 0.30876472228180707, 'min_child_samples': 53, 'num_leaves': 196, 'max_depth': 15, 'max_bin': 502, 'cat_smooth': 79, 'cat_l2': 0.10999111054014993}. Best is trial 13 with value: 0.7129671568808704.[0m


Early stopping, best iteration is:
[37]	valid_0's rmse: 0.717811
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.715503
[2000]	valid_0's rmse: 0.713654
[3000]	valid_0's rmse: 0.713225
Early stopping, best iteration is:
[3033]	valid_0's rmse: 0.713208
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71552
[2000]	valid_0's rmse: 0.713796
[3000]	valid_0's rmse: 0.713378
[4000]	valid_0's rmse: 0.713279
Early stopping, best iteration is:
[3988]	valid_0's rmse: 0.713275


[32m[I 2021-08-23 15:29:21,652][0m Trial 15 finished with value: 0.7130778826911622 and parameters: {'reg_lambda': 36.51298166228841, 'reg_alpha': 8.907652847301794, 'subsample': 0.632280960914277, 'colsample_bytree': 0.3066988818342807, 'learning_rate': 0.1371554935538694, 'min_child_samples': 54, 'num_leaves': 193, 'max_depth': 2, 'max_bin': 384, 'cat_smooth': 95, 'cat_l2': 89.02599664331731}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[140]	valid_0's rmse: 0.714536
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[154]	valid_0's rmse: 0.714874


[32m[I 2021-08-23 15:29:43,426][0m Trial 16 finished with value: 0.7139552834071271 and parameters: {'reg_lambda': 45.26541852839866, 'reg_alpha': 24.95992792896404, 'subsample': 0.6427683740002086, 'colsample_bytree': 0.16373548260596374, 'learning_rate': 0.4127327248222118, 'min_child_samples': 52, 'num_leaves': 90, 'max_depth': 11, 'max_bin': 301, 'cat_smooth': 71, 'cat_l2': 98.2662589716185}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713884
Early stopping, best iteration is:
[879]	valid_0's rmse: 0.713842
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[713]	valid_0's rmse: 0.713666


[32m[I 2021-08-23 15:30:21,457][0m Trial 17 finished with value: 0.7133967142383741 and parameters: {'reg_lambda': 59.73863771926406, 'reg_alpha': 6.062799295770105, 'subsample': 0.38749318746418404, 'colsample_bytree': 0.3130914558669684, 'learning_rate': 0.13283692284407242, 'min_child_samples': 52, 'num_leaves': 19, 'max_depth': 18, 'max_bin': 393, 'cat_smooth': 89, 'cat_l2': 41.83735091222549}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.715612
[2000]	valid_0's rmse: 0.714061
[3000]	valid_0's rmse: 0.713544
[4000]	valid_0's rmse: 0.713308
[5000]	valid_0's rmse: 0.713201
[6000]	valid_0's rmse: 0.71315
[7000]	valid_0's rmse: 0.713115
[8000]	valid_0's rmse: 0.713093
[9000]	valid_0's rmse: 0.713084
Early stopping, best iteration is:
[9069]	valid_0's rmse: 0.713083
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.716578
[2000]	valid_0's rmse: 0.714566
[3000]	valid_0's rmse: 0.713745
[4000]	valid_0's rmse: 0.713514
[5000]	valid_0's rmse: 0.713377
[6000]	valid_0's rmse: 0.713333
[7000]	valid_0's rmse: 0.713287
[8000]	valid_0's rmse: 0.713266
Early stopping, best iteration is:
[7978]	valid_0's rmse: 0.713265


[32m[I 2021-08-23 15:33:51,306][0m Trial 18 finished with value: 0.7129867431910842 and parameters: {'reg_lambda': 41.0713476961404, 'reg_alpha': 31.94879085808348, 'subsample': 0.5394287255952266, 'colsample_bytree': 0.11101978103023588, 'learning_rate': 0.21177851244932577, 'min_child_samples': 36, 'num_leaves': 198, 'max_depth': 2, 'max_bin': 409, 'cat_smooth': 67, 'cat_l2': 2.6768295004454483}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[140]	valid_0's rmse: 0.715348
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[152]	valid_0's rmse: 0.715105


[32m[I 2021-08-23 15:34:12,564][0m Trial 19 finished with value: 0.7143150181516701 and parameters: {'reg_lambda': 55.454180219022305, 'reg_alpha': 39.65217413239088, 'subsample': 0.5281306160824033, 'colsample_bytree': 0.5667342712261696, 'learning_rate': 0.4615593974477828, 'min_child_samples': 39, 'num_leaves': 157, 'max_depth': 6, 'max_bin': 481, 'cat_smooth': 63, 'cat_l2': 3.094367145324927}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[303]	valid_0's rmse: 0.713659
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[291]	valid_0's rmse: 0.7137


[32m[I 2021-08-23 15:34:39,998][0m Trial 20 finished with value: 0.7132719554649679 and parameters: {'reg_lambda': 68.12567802326325, 'reg_alpha': 30.62010729964434, 'subsample': 0.38055424714182545, 'colsample_bytree': 0.12461580860404535, 'learning_rate': 0.22677872530949553, 'min_child_samples': 25, 'num_leaves': 128, 'max_depth': 12, 'max_bin': 594, 'cat_smooth': 42, 'cat_l2': 0.5132325392478948}. Best is trial 13 with value: 0.7129671568808704.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714175
[2000]	valid_0's rmse: 0.713133
[3000]	valid_0's rmse: 0.712957
Early stopping, best iteration is:
[3107]	valid_0's rmse: 0.712931
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714221
[2000]	valid_0's rmse: 0.713192
[3000]	valid_0's rmse: 0.713018
Early stopping, best iteration is:
[3039]	valid_0's rmse: 0.713006


[32m[I 2021-08-23 15:36:28,034][0m Trial 21 finished with value: 0.7128286882236133 and parameters: {'reg_lambda': 37.43914460247242, 'reg_alpha': 30.339069983167338, 'subsample': 0.6775270670661361, 'colsample_bytree': 0.2398515288244969, 'learning_rate': 0.20886675681928338, 'min_child_samples': 47, 'num_leaves': 196, 'max_depth': 2, 'max_bin': 371, 'cat_smooth': 76, 'cat_l2': 0.1264463864796752}. Best is trial 21 with value: 0.7128286882236133.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[192]	valid_0's rmse: 0.714095
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[216]	valid_0's rmse: 0.714361


[32m[I 2021-08-23 15:36:49,438][0m Trial 22 finished with value: 0.7136451438930904 and parameters: {'reg_lambda': 38.83586596492692, 'reg_alpha': 30.943457388410106, 'subsample': 0.6937062434077768, 'colsample_bytree': 0.1957795044442749, 'learning_rate': 0.3812140347451515, 'min_child_samples': 30, 'num_leaves': 193, 'max_depth': 6, 'max_bin': 363, 'cat_smooth': 73, 'cat_l2': 0.08886278441458219}. Best is trial 21 with value: 0.7128286882236133.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713609
Early stopping, best iteration is:
[1332]	valid_0's rmse: 0.713542
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713552
Early stopping, best iteration is:
[1135]	valid_0's rmse: 0.713474


[32m[I 2021-08-23 15:37:43,229][0m Trial 23 finished with value: 0.7132469522961162 and parameters: {'reg_lambda': 43.78891061338918, 'reg_alpha': 38.869720148499866, 'subsample': 0.5698523982338994, 'colsample_bytree': 0.393510113313295, 'learning_rate': 0.5621577549553702, 'min_child_samples': 44, 'num_leaves': 167, 'max_depth': 2, 'max_bin': 466, 'cat_smooth': 62, 'cat_l2': 1.2739317961616157}. Best is trial 21 with value: 0.7128286882236133.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[495]	valid_0's rmse: 0.713725
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[473]	valid_0's rmse: 0.71416


[32m[I 2021-08-23 15:38:19,782][0m Trial 24 finished with value: 0.7136084957685566 and parameters: {'reg_lambda': 19.082003546057567, 'reg_alpha': 45.37968433739627, 'subsample': 0.6910911799707885, 'colsample_bytree': 0.22637911009482387, 'learning_rate': 0.20942628916208975, 'min_child_samples': 48, 'num_leaves': 196, 'max_depth': 6, 'max_bin': 348, 'cat_smooth': 77, 'cat_l2': 0.2141728022238479}. Best is trial 21 with value: 0.7128286882236133.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[494]	valid_0's rmse: 0.713608
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[374]	valid_0's rmse: 0.713559


[32m[I 2021-08-23 15:38:50,409][0m Trial 25 finished with value: 0.7131560462333969 and parameters: {'reg_lambda': 30.76446026474719, 'reg_alpha': 34.39375752194798, 'subsample': 0.5534634577095413, 'colsample_bytree': 0.10314712450874251, 'learning_rate': 0.21502441491086263, 'min_child_samples': 62, 'num_leaves': 198, 'max_depth': 18, 'max_bin': 445, 'cat_smooth': 68, 'cat_l2': 0.050609817595108623}. Best is trial 21 with value: 0.7128286882236133.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.718948
[2000]	valid_0's rmse: 0.71589
[3000]	valid_0's rmse: 0.714493
[4000]	valid_0's rmse: 0.713639
[5000]	valid_0's rmse: 0.713173
[6000]	valid_0's rmse: 0.712972
[7000]	valid_0's rmse: 0.712863
[8000]	valid_0's rmse: 0.712813
[9000]	valid_0's rmse: 0.712784
Early stopping, best iteration is:
[9295]	valid_0's rmse: 0.712769
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.7191
[2000]	valid_0's rmse: 0.716015
[3000]	valid_0's rmse: 0.714656
[4000]	valid_0's rmse: 0.713849
[5000]	valid_0's rmse: 0.713451
[6000]	valid_0's rmse: 0.713218
[7000]	valid_0's rmse: 0.713099
[8000]	valid_0's rmse: 0.713046
Early stopping, best iteration is:
[8671]	valid_0's rmse: 0.71301


[32m[I 2021-08-23 15:42:40,544][0m Trial 26 finished with value: 0.7127896275680465 and parameters: {'reg_lambda': 50.73580972273695, 'reg_alpha': 20.511875015884407, 'subsample': 0.8746464245788418, 'colsample_bytree': 0.6267030518811912, 'learning_rate': 0.0602064011142347, 'min_child_samples': 34, 'num_leaves': 181, 'max_depth': 2, 'max_bin': 525, 'cat_smooth': 82, 'cat_l2': 5.73502470505396}. Best is trial 26 with value: 0.7127896275680465.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713881
Early stopping, best iteration is:
[1578]	valid_0's rmse: 0.713629
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713908
Early stopping, best iteration is:
[1498]	valid_0's rmse: 0.713688


[32m[I 2021-08-23 15:43:56,875][0m Trial 27 finished with value: 0.7134850393129377 and parameters: {'reg_lambda': 51.66942512231507, 'reg_alpha': 21.59281678603076, 'subsample': 0.8777500398878413, 'colsample_bytree': 0.6320924596630856, 'learning_rate': 0.06312231143364042, 'min_child_samples': 18, 'num_leaves': 181, 'max_depth': 5, 'max_bin': 529, 'cat_smooth': 84, 'cat_l2': 9.277396127948222}. Best is trial 26 with value: 0.7127896275680465.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[52]	valid_0's rmse: 0.716242
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[65]	valid_0's rmse: 0.716164


[32m[I 2021-08-23 15:44:21,915][0m Trial 28 finished with value: 0.7151188114076312 and parameters: {'reg_lambda': 33.15968286170836, 'reg_alpha': 25.655438766380442, 'subsample': 0.9176612436579279, 'colsample_bytree': 0.5033800224414263, 'learning_rate': 0.3582550090241852, 'min_child_samples': 60, 'num_leaves': 162, 'max_depth': 10, 'max_bin': 608, 'cat_smooth': 57, 'cat_l2': 0.044884434665890045}. Best is trial 26 with value: 0.7127896275680465.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[682]	valid_0's rmse: 0.713904
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[609]	valid_0's rmse: 0.713599


[32m[I 2021-08-23 15:45:26,556][0m Trial 29 finished with value: 0.7134422559364586 and parameters: {'reg_lambda': 12.353626408258972, 'reg_alpha': 19.85633961876123, 'subsample': 0.8063242728142914, 'colsample_bytree': 0.39413134132115335, 'learning_rate': 0.08072679667322709, 'min_child_samples': 84, 'num_leaves': 136, 'max_depth': 8, 'max_bin': 746, 'cat_smooth': 79, 'cat_l2': 1.0171110951620668}. Best is trial 26 with value: 0.7127896275680465.[0m


Number of finished trials: 30
Best trial: {'reg_lambda': 50.73580972273695, 'reg_alpha': 20.511875015884407, 'subsample': 0.8746464245788418, 'colsample_bytree': 0.6267030518811912, 'learning_rate': 0.0602064011142347, 'min_child_samples': 34, 'num_leaves': 181, 'max_depth': 2, 'max_bin': 525, 'cat_smooth': 82, 'cat_l2': 5.73502470505396}


In [15]:
study.best_params

{'reg_lambda': 50.73580972273695,
 'reg_alpha': 20.511875015884407,
 'subsample': 0.8746464245788418,
 'colsample_bytree': 0.6267030518811912,
 'learning_rate': 0.0602064011142347,
 'min_child_samples': 34,
 'num_leaves': 181,
 'max_depth': 2,
 'max_bin': 525,
 'cat_smooth': 82,
 'cat_l2': 5.73502470505396}

# Log

====== Ordinal encoding =========

0.7123367407151787 no noise ver1

0.7158040228380602 row-wise noise ver2

0.7157995952863279 random noise ver3

====== One-hot encoding =========

0.7203460513257339 no noise ver4

