In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Low MI scores
for pp in ["cat2","cat3","cat4","cat6"]:
    train.pop(pp)
    test.pop(pp)
train.head()

Unnamed: 0,id,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,E,C,N,0.20147,-0.014822,0.669699,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,D,F,A,O,0.743068,0.367411,1.021605,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,D,D,A,F,0.742708,0.310383,-0.012673,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,D,E,C,K,0.429551,0.620998,0.577942,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,D,E,A,N,1.058291,0.367492,-0.052389,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [6]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,E,C,N,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,D,F,A,O,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,D,D,A,F,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,D,E,C,K,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,D,E,A,N,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [7]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()


Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,1.0,1.0,1.0,4.0,2.0,13.0,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,1.0,1.0,3.0,5.0,0.0,14.0,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,0.0,0.0,3.0,3.0,0.0,5.0,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,1.0,1.0,3.0,4.0,2.0,10.0,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,0.0,0.0,3.0,4.0,0.0,13.0,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


In [8]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()
'''

'\n# List of categorical columns\nobject_cols = [col for col in features.columns if \'cat\' in col]\n\n# one-hot-encode categorical columns\nX = features.copy()\nX_test = test.drop([\'id\'], axis=1).copy()\n\noh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")\nX_ohe = oh_encoder.fit_transform(features[object_cols])\nX_test_ohe = oh_encoder.transform(test[object_cols])\n\nX_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])\nX_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])\n\nX = pd.concat([X, X_ohe], axis=1)\nX_test = pd.concat([X_test, X_test_ohe], axis=1)\nX = X.drop(object_cols, axis=1)\nX_test = X_test.drop(object_cols, axis=1)\n    \n# Preview the one-hot-encoded features\nX.head()\n'

In [9]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [10]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [11]:
pseudo = pd.read_csv("../input/30-days-pseudo/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [12]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [13]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
               #'device':'gpu',  # Use GPU acceleration
               #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 0.01, 100),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 0.01, 50),
                'subsample': trial.suggest_uniform('subsample', 0.2, 1.0),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.1, 1.0),
                #'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
               'learning_rate':trial.suggest_uniform('learning_rate', 1e-3, 1),
                 'min_child_samples':trial.suggest_int('min_child_samples', 5, 100),
              'num_leaves':trial.suggest_int('num_leaves', 10, 200),
              'max_depth':trial.suggest_int('max_depth', 2, 30),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
               'n_estimators':N_ESTIMATORS,
                'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 300, 1000),
              'cat_smooth':trial.suggest_int('cat_smooth', 5, 100),
              'cat_l2':trial.suggest_loguniform('cat_l2', 1e-3, 100)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_row(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
        categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
    )


    lgb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, lgb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [14]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-23 00:18:37,250][0m A new study created in memory with name: no-name-ddf3c641-b387-4a30-89a5-e49653820507[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[536]	valid_0's rmse: 0.713472
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[670]	valid_0's rmse: 0.713616


[32m[I 2021-08-23 00:19:40,660][0m Trial 0 finished with value: 0.7132462550941675 and parameters: {'reg_lambda': 71.22804518208092, 'reg_alpha': 20.914019373466296, 'subsample': 0.4398404020748512, 'colsample_bytree': 0.31734777406489234, 'learning_rate': 0.06163469479301172, 'min_child_samples': 17, 'num_leaves': 119, 'max_depth': 20, 'max_bin': 896, 'cat_smooth': 40, 'cat_l2': 1.3138040115205214}. Best is trial 0 with value: 0.7132462550941675.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714465
[2000]	valid_0's rmse: 0.713356
[3000]	valid_0's rmse: 0.713088
[4000]	valid_0's rmse: 0.713006
Early stopping, best iteration is:
[4707]	valid_0's rmse: 0.712988
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714331
[2000]	valid_0's rmse: 0.713243
[3000]	valid_0's rmse: 0.712928
[4000]	valid_0's rmse: 0.712845
Early stopping, best iteration is:
[4029]	valid_0's rmse: 0.712841


[32m[I 2021-08-23 00:23:27,138][0m Trial 1 finished with value: 0.7128322936038564 and parameters: {'reg_lambda': 28.354896871840413, 'reg_alpha': 41.160128543109266, 'subsample': 0.5137580786570335, 'colsample_bytree': 0.23967435576049964, 'learning_rate': 0.02690118616570491, 'min_child_samples': 79, 'num_leaves': 24, 'max_depth': 24, 'max_bin': 894, 'cat_smooth': 50, 'cat_l2': 0.006269585073940497}. Best is trial 1 with value: 0.7128322936038564.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[21]	valid_0's rmse: 0.720187
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:23:44,454][0m Trial 2 finished with value: 0.7171877064746541 and parameters: {'reg_lambda': 17.060210950373325, 'reg_alpha': 7.2162771704842505, 'subsample': 0.9648115501720349, 'colsample_bytree': 0.8508268221669721, 'learning_rate': 0.6230178563318562, 'min_child_samples': 31, 'num_leaves': 139, 'max_depth': 9, 'max_bin': 317, 'cat_smooth': 81, 'cat_l2': 0.1171394543192337}. Best is trial 1 with value: 0.7128322936038564.[0m


Early stopping, best iteration is:
[21]	valid_0's rmse: 0.718868
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[14]	valid_0's rmse: 0.720315
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:24:05,391][0m Trial 3 finished with value: 0.7172264043500538 and parameters: {'reg_lambda': 51.46581034101, 'reg_alpha': 8.531572444361245, 'subsample': 0.9375753454140665, 'colsample_bytree': 0.7677723900653577, 'learning_rate': 0.7445569010402524, 'min_child_samples': 36, 'num_leaves': 184, 'max_depth': 24, 'max_bin': 568, 'cat_smooth': 95, 'cat_l2': 6.952696848721614}. Best is trial 1 with value: 0.7128322936038564.[0m


Early stopping, best iteration is:
[14]	valid_0's rmse: 0.720651
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[251]	valid_0's rmse: 0.713871
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[243]	valid_0's rmse: 0.713887


[32m[I 2021-08-23 00:24:23,198][0m Trial 4 finished with value: 0.7132244608667255 and parameters: {'reg_lambda': 93.73482582065542, 'reg_alpha': 39.5438135298472, 'subsample': 0.725097946154055, 'colsample_bytree': 0.2974240330536204, 'learning_rate': 0.5891574236685329, 'min_child_samples': 22, 'num_leaves': 11, 'max_depth': 17, 'max_bin': 969, 'cat_smooth': 57, 'cat_l2': 4.902976586986592}. Best is trial 1 with value: 0.7128322936038564.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[631]	valid_0's rmse: 0.714277
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[574]	valid_0's rmse: 0.714271


[32m[I 2021-08-23 00:25:27,783][0m Trial 5 finished with value: 0.7140705520314125 and parameters: {'reg_lambda': 1.1175998728641905, 'reg_alpha': 48.685001028138316, 'subsample': 0.9149727242983487, 'colsample_bytree': 0.7148089889738509, 'learning_rate': 0.07422775920720738, 'min_child_samples': 72, 'num_leaves': 82, 'max_depth': 21, 'max_bin': 816, 'cat_smooth': 52, 'cat_l2': 24.837915099605105}. Best is trial 1 with value: 0.7128322936038564.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[28]	valid_0's rmse: 0.717826
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:25:50,245][0m Trial 6 finished with value: 0.7159515505947495 and parameters: {'reg_lambda': 4.913219628709567, 'reg_alpha': 40.10050472834298, 'subsample': 0.3041794541276379, 'colsample_bytree': 0.380015688403492, 'learning_rate': 0.7519927935115147, 'min_child_samples': 52, 'num_leaves': 199, 'max_depth': 22, 'max_bin': 729, 'cat_smooth': 51, 'cat_l2': 0.00390672428063351}. Best is trial 1 with value: 0.7128322936038564.[0m


Early stopping, best iteration is:
[21]	valid_0's rmse: 0.7182
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[12]	valid_0's rmse: 0.719871
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:26:09,893][0m Trial 7 finished with value: 0.7174768498059338 and parameters: {'reg_lambda': 71.11117177962966, 'reg_alpha': 24.794417419421382, 'subsample': 0.22865272348966015, 'colsample_bytree': 0.5896679888167241, 'learning_rate': 0.8705777795238969, 'min_child_samples': 61, 'num_leaves': 146, 'max_depth': 23, 'max_bin': 606, 'cat_smooth': 6, 'cat_l2': 0.005511639432033395}. Best is trial 1 with value: 0.7128322936038564.[0m


Early stopping, best iteration is:
[16]	valid_0's rmse: 0.720687
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[57]	valid_0's rmse: 0.715133
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:26:25,475][0m Trial 8 finished with value: 0.7143436004786342 and parameters: {'reg_lambda': 89.13725251112876, 'reg_alpha': 20.68285565280326, 'subsample': 0.7115535119030418, 'colsample_bytree': 0.41727355597870797, 'learning_rate': 0.48909772266066, 'min_child_samples': 81, 'num_leaves': 51, 'max_depth': 24, 'max_bin': 572, 'cat_smooth': 82, 'cat_l2': 0.11900961622495397}. Best is trial 1 with value: 0.7128322936038564.[0m


Early stopping, best iteration is:
[54]	valid_0's rmse: 0.71549
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[29]	valid_0's rmse: 0.718374
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:26:46,533][0m Trial 9 finished with value: 0.7167539059660357 and parameters: {'reg_lambda': 11.854073549299434, 'reg_alpha': 44.41905352830439, 'subsample': 0.970786407548289, 'colsample_bytree': 0.8287236663937738, 'learning_rate': 0.7253747210642744, 'min_child_samples': 93, 'num_leaves': 104, 'max_depth': 30, 'max_bin': 362, 'cat_smooth': 30, 'cat_l2': 7.316737141273215}. Best is trial 1 with value: 0.7128322936038564.[0m


Early stopping, best iteration is:
[33]	valid_0's rmse: 0.719293
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71278
Early stopping, best iteration is:
[808]	valid_0's rmse: 0.712665
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71293
Early stopping, best iteration is:
[849]	valid_0's rmse: 0.712893


[32m[I 2021-08-23 00:27:25,401][0m Trial 10 finished with value: 0.7125197894042219 and parameters: {'reg_lambda': 31.96159394963173, 'reg_alpha': 34.77324636947614, 'subsample': 0.5060755836496151, 'colsample_bytree': 0.14468936339449, 'learning_rate': 0.31091805860052085, 'min_child_samples': 99, 'num_leaves': 11, 'max_depth': 30, 'max_bin': 996, 'cat_smooth': 12, 'cat_l2': 0.0010766597770169293}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[582]	valid_0's rmse: 0.71266
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[717]	valid_0's rmse: 0.712963


[32m[I 2021-08-23 00:27:57,340][0m Trial 11 finished with value: 0.7125637676172063 and parameters: {'reg_lambda': 30.169617862134274, 'reg_alpha': 33.53322480707972, 'subsample': 0.5004877423710059, 'colsample_bytree': 0.14682780611191346, 'learning_rate': 0.33327967570295697, 'min_child_samples': 100, 'num_leaves': 10, 'max_depth': 30, 'max_bin': 992, 'cat_smooth': 5, 'cat_l2': 0.0010685153625499511}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[285]	valid_0's rmse: 0.713315
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[258]	valid_0's rmse: 0.713206


[32m[I 2021-08-23 00:28:23,672][0m Trial 12 finished with value: 0.7128311772090961 and parameters: {'reg_lambda': 35.68199276090073, 'reg_alpha': 32.02938955908844, 'subsample': 0.3938285317827363, 'colsample_bytree': 0.14795595473922962, 'learning_rate': 0.3089967802325722, 'min_child_samples': 97, 'num_leaves': 47, 'max_depth': 30, 'max_bin': 998, 'cat_smooth': 5, 'cat_l2': 0.0010402067243497253}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[798]	valid_0's rmse: 0.712813
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.712829
Early stopping, best iteration is:
[885]	valid_0's rmse: 0.712813


[32m[I 2021-08-23 00:28:47,623][0m Trial 13 finished with value: 0.7125942506725893 and parameters: {'reg_lambda': 43.727949828577245, 'reg_alpha': 31.688804898174016, 'subsample': 0.6169041594799058, 'colsample_bytree': 0.103316108948798, 'learning_rate': 0.2940515157593458, 'min_child_samples': 97, 'num_leaves': 11, 'max_depth': 30, 'max_bin': 996, 'cat_smooth': 18, 'cat_l2': 0.001163866828004529}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[367]	valid_0's rmse: 0.71328
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[353]	valid_0's rmse: 0.713223


[32m[I 2021-08-23 00:29:14,152][0m Trial 14 finished with value: 0.7128759219630435 and parameters: {'reg_lambda': 24.70789484316013, 'reg_alpha': 31.606856536471124, 'subsample': 0.6014423538364002, 'colsample_bytree': 0.10773007069065188, 'learning_rate': 0.2887265214410217, 'min_child_samples': 100, 'num_leaves': 48, 'max_depth': 8, 'max_bin': 757, 'cat_smooth': 19, 'cat_l2': 0.024821673022320242}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[68]	valid_0's rmse: 0.715874
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[101]	valid_0's rmse: 0.715604


[32m[I 2021-08-23 00:29:36,013][0m Trial 15 finished with value: 0.7147114740474484 and parameters: {'reg_lambda': 58.632097926392994, 'reg_alpha': 34.09343194132642, 'subsample': 0.47099818787874337, 'colsample_bytree': 0.5033023340976392, 'learning_rate': 0.41105464717703993, 'min_child_samples': 84, 'num_leaves': 72, 'max_depth': 28, 'max_bin': 443, 'cat_smooth': 14, 'cat_l2': 0.0230295337928035}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[441]	valid_0's rmse: 0.71306
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[475]	valid_0's rmse: 0.713098


[32m[I 2021-08-23 00:30:08,923][0m Trial 16 finished with value: 0.7127246499456431 and parameters: {'reg_lambda': 37.64398286154883, 'reg_alpha': 13.900993104055816, 'subsample': 0.7169666171541105, 'colsample_bytree': 0.2206411393186809, 'learning_rate': 0.1695957210084681, 'min_child_samples': 66, 'num_leaves': 30, 'max_depth': 12, 'max_bin': 911, 'cat_smooth': 30, 'cat_l2': 0.0010737300783094476}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.714881
[2000]	valid_0's rmse: 0.713327
[3000]	valid_0's rmse: 0.71293
Early stopping, best iteration is:
[3448]	valid_0's rmse: 0.712891
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71501
[2000]	valid_0's rmse: 0.713457
[3000]	valid_0's rmse: 0.713021
Early stopping, best iteration is:
[3486]	valid_0's rmse: 0.712961


[32m[I 2021-08-23 00:31:53,601][0m Trial 17 finished with value: 0.7128423025828451 and parameters: {'reg_lambda': 20.42667912764319, 'reg_alpha': 49.128057870524216, 'subsample': 0.3411557639043098, 'colsample_bytree': 0.17475807957140638, 'learning_rate': 0.18475386966736976, 'min_child_samples': 49, 'num_leaves': 10, 'max_depth': 2, 'max_bin': 830, 'cat_smooth': 6, 'cat_l2': 0.025387515260068313}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[71]	valid_0's rmse: 0.715701
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[78]	valid_0's rmse: 0.715442


[32m[I 2021-08-23 00:32:11,088][0m Trial 18 finished with value: 0.7144455228983145 and parameters: {'reg_lambda': 30.904641274818427, 'reg_alpha': 26.99288318183595, 'subsample': 0.5389102438870237, 'colsample_bytree': 0.5989471461785592, 'learning_rate': 0.42758791546181896, 'min_child_samples': 86, 'num_leaves': 67, 'max_depth': 27, 'max_bin': 672, 'cat_smooth': 29, 'cat_l2': 99.749236337292}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[312]	valid_0's rmse: 0.713739
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[449]	valid_0's rmse: 0.713478


[32m[I 2021-08-23 00:32:40,652][0m Trial 19 finished with value: 0.713252279475325 and parameters: {'reg_lambda': 56.4694838441933, 'reg_alpha': 36.0627670768438, 'subsample': 0.8330956762697885, 'colsample_bytree': 0.44001340786507087, 'learning_rate': 0.19565690069403868, 'min_child_samples': 6, 'num_leaves': 30, 'max_depth': 27, 'max_bin': 947, 'cat_smooth': 14, 'cat_l2': 0.0028758524637212497}. Best is trial 10 with value: 0.7125197894042219.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[54]	valid_0's rmse: 0.716158
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-23 00:33:04,817][0m Trial 20 finished with value: 0.7152105426868423 and parameters: {'reg_lambda': 41.50513831586017, 'reg_alpha': 25.580718606701456, 'subsample': 0.2973189858725259, 'colsample_bytree': 0.9901356530116481, 'learning_rate': 0.3764153982744623, 'min_child_samples': 92, 'num_leaves': 94, 'max_depth': 17, 'max_bin': 856, 'cat_smooth': 40, 'cat_l2': 0.22726672513583512}. Best is trial 10 with value: 0.7125197894042219.[0m


Early stopping, best iteration is:
[55]	valid_0's rmse: 0.716292
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.712893
Early stopping, best iteration is:
[1604]	valid_0's rmse: 0.712829
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71279
Early stopping, best iteration is:
[1364]	valid_0's rmse: 0.712693


[32m[I 2021-08-23 00:33:58,685][0m Trial 21 finished with value: 0.7124995237022378 and parameters: {'reg_lambda': 45.09994580439574, 'reg_alpha': 29.653225831399617, 'subsample': 0.6272858433082099, 'colsample_bytree': 0.10415878244926194, 'learning_rate': 0.2653785487729413, 'min_child_samples': 98, 'num_leaves': 10, 'max_depth': 30, 'max_bin': 986, 'cat_smooth': 19, 'cat_l2': 0.00110483287391013}. Best is trial 21 with value: 0.7124995237022378.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.712724
[2000]	valid_0's rmse: 0.712626
Early stopping, best iteration is:
[1981]	valid_0's rmse: 0.712606
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.712766
Early stopping, best iteration is:
[1560]	valid_0's rmse: 0.712574


[32m[I 2021-08-23 00:35:04,602][0m Trial 22 finished with value: 0.7123367407151787 and parameters: {'reg_lambda': 47.16380193795371, 'reg_alpha': 29.16964013102968, 'subsample': 0.6268349823731298, 'colsample_bytree': 0.11028261217734764, 'learning_rate': 0.20170403778571633, 'min_child_samples': 89, 'num_leaves': 12, 'max_depth': 27, 'max_bin': 979, 'cat_smooth': 21, 'cat_l2': 0.0010504043298797654}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[291]	valid_0's rmse: 0.713393
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[200]	valid_0's rmse: 0.713299


[32m[I 2021-08-23 00:35:29,385][0m Trial 23 finished with value: 0.7129500735361347 and parameters: {'reg_lambda': 64.79417464667881, 'reg_alpha': 17.694539493703584, 'subsample': 0.6522941477248214, 'colsample_bytree': 0.24513006758533884, 'learning_rate': 0.23482086001609387, 'min_child_samples': 74, 'num_leaves': 32, 'max_depth': 27, 'max_bin': 960, 'cat_smooth': 23, 'cat_l2': 0.009761424977159368}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[652]	valid_0's rmse: 0.713218
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[652]	valid_0's rmse: 0.713054


[32m[I 2021-08-23 00:36:13,288][0m Trial 24 finished with value: 0.7128439068052604 and parameters: {'reg_lambda': 46.76753585245165, 'reg_alpha': 27.478152298087174, 'subsample': 0.7880281504915227, 'colsample_bytree': 0.10639011054233816, 'learning_rate': 0.13093312315166944, 'min_child_samples': 89, 'num_leaves': 56, 'max_depth': 26, 'max_bin': 786, 'cat_smooth': 40, 'cat_l2': 0.0017875578487013069}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[146]	valid_0's rmse: 0.713859
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[138]	valid_0's rmse: 0.714234


[32m[I 2021-08-23 00:36:30,028][0m Trial 25 finished with value: 0.7132952452479279 and parameters: {'reg_lambda': 81.51924346822337, 'reg_alpha': 37.370507461082674, 'subsample': 0.5610288074846574, 'colsample_bytree': 0.3076530640656371, 'learning_rate': 0.5203982203616639, 'min_child_samples': 100, 'num_leaves': 22, 'max_depth': 30, 'max_bin': 919, 'cat_smooth': 12, 'cat_l2': 0.012374738658533963}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[405]	valid_0's rmse: 0.713046
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[321]	valid_0's rmse: 0.712977


[32m[I 2021-08-23 00:36:59,149][0m Trial 26 finished with value: 0.7126764305111786 and parameters: {'reg_lambda': 52.72605324297714, 'reg_alpha': 28.884306606871785, 'subsample': 0.665847679041292, 'colsample_bytree': 0.17269411751302655, 'learning_rate': 0.2419112809092898, 'min_child_samples': 61, 'num_leaves': 37, 'max_depth': 20, 'max_bin': 860, 'cat_smooth': 26, 'cat_l2': 0.002765121574486186}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.712955
[2000]	valid_0's rmse: 0.71263
Early stopping, best iteration is:
[2008]	valid_0's rmse: 0.712627
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.713013
[2000]	valid_0's rmse: 0.712712
Early stopping, best iteration is:
[2130]	valid_0's rmse: 0.712697


[32m[I 2021-08-23 00:38:13,674][0m Trial 27 finished with value: 0.7124960047196875 and parameters: {'reg_lambda': 64.2515999033586, 'reg_alpha': 44.70289222212386, 'subsample': 0.8175146373431239, 'colsample_bytree': 0.2191826488060004, 'learning_rate': 0.10425195954640588, 'min_child_samples': 76, 'num_leaves': 13, 'max_depth': 26, 'max_bin': 998, 'cat_smooth': 37, 'cat_l2': 0.05068860967401655}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[437]	valid_0's rmse: 0.714049
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[291]	valid_0's rmse: 0.713971


[32m[I 2021-08-23 00:38:49,110][0m Trial 28 finished with value: 0.713572553551387 and parameters: {'reg_lambda': 64.38397557065153, 'reg_alpha': 0.6929150386561105, 'subsample': 0.8473477434752169, 'colsample_bytree': 0.35824649130229813, 'learning_rate': 0.11198384162157407, 'min_child_samples': 76, 'num_leaves': 66, 'max_depth': 25, 'max_bin': 700, 'cat_smooth': 35, 'cat_l2': 0.8051077490759458}. Best is trial 22 with value: 0.7123367407151787.[0m


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.715322
[2000]	valid_0's rmse: 0.713636
[3000]	valid_0's rmse: 0.713357
Early stopping, best iteration is:
[3554]	valid_0's rmse: 0.713325
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.715306
[2000]	valid_0's rmse: 0.713652
[3000]	valid_0's rmse: 0.713353
Early stopping, best iteration is:
[3780]	valid_0's rmse: 0.713317


[32m[I 2021-08-23 00:44:24,434][0m Trial 29 finished with value: 0.7132515466940019 and parameters: {'reg_lambda': 79.8230262694479, 'reg_alpha': 44.4697838825761, 'subsample': 0.7845308779925164, 'colsample_bytree': 0.2672599865217198, 'learning_rate': 0.013981658523843199, 'min_child_samples': 66, 'num_leaves': 128, 'max_depth': 19, 'max_bin': 893, 'cat_smooth': 64, 'cat_l2': 0.060018874117894236}. Best is trial 22 with value: 0.7123367407151787.[0m


Number of finished trials: 30
Best trial: {'reg_lambda': 47.16380193795371, 'reg_alpha': 29.16964013102968, 'subsample': 0.6268349823731298, 'colsample_bytree': 0.11028261217734764, 'learning_rate': 0.20170403778571633, 'min_child_samples': 89, 'num_leaves': 12, 'max_depth': 27, 'max_bin': 979, 'cat_smooth': 21, 'cat_l2': 0.0010504043298797654}


In [15]:
study.best_params

{'reg_lambda': 47.16380193795371,
 'reg_alpha': 29.16964013102968,
 'subsample': 0.6268349823731298,
 'colsample_bytree': 0.11028261217734764,
 'learning_rate': 0.20170403778571633,
 'min_child_samples': 89,
 'num_leaves': 12,
 'max_depth': 27,
 'max_bin': 979,
 'cat_smooth': 21,
 'cat_l2': 0.0010504043298797654}

# Log

====== Ordinal encoding =========



====== One-hot encoding =========


