In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

#import lightgbm as lgb
#import xgboost as xgb
import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,C,B,B,A,E,C,N,...,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,A,A,B,D,A,F,A,O,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,A,C,B,D,A,D,A,F,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,A,C,B,D,A,E,C,K,...,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,A,C,B,D,A,E,A,N,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [6]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()
'''

"\n# List of categorical columns\nobject_cols = [col for col in features.columns if 'cat' in col]\n\n# ordinal-encode categorical columns\nX = features.copy()\nX_test = test.drop(['id'], axis=1).copy()\nordinal_encoder = OrdinalEncoder()\nX[object_cols] = ordinal_encoder.fit_transform(features[object_cols])\nX_test[object_cols] = ordinal_encoder.transform(test[object_cols])\n\n# Preview the ordinal-encoded features\nX.head()\n"

In [7]:
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()

Unnamed: 0,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,...,ohe_46,ohe_47,ohe_48,ohe_49,ohe_50,ohe_51,ohe_52,ohe_53,ohe_54,ohe_55
0,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [8]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [9]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [10]:
pseudo = pd.read_csv("../input/30-days-pseudo/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [11]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [12]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
               'bootstrap_type': 'Poisson',
            'loss_function': 'RMSE',
            'eval_metric': 'RMSE',
          'task_type': 'GPU',
          'max_depth':trial.suggest_int('max_depth', 4, 6),
           'learning_rate':trial.suggest_uniform('learning_rate', 1.138e-2, 1.338e-2),
          'n_estimators':N_ESTIMATORS,
        'max_bin':trial.suggest_int('max_bin', 606, 667),
             'min_data_in_leaf':trial.suggest_int('min_data_in_leaf', 19, 43),
         'reg_lambda':trial.suggest_uniform('reg_lambda', 8, 16),
            'subsample': trial.suggest_uniform('subsample', 0.8664, 0.9664)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  ctb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_rn(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_seed'] = inseed

    model = ctb.CatBoostRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        use_best_model=True,
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE
       
    )


    ctb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, ctb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [13]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 15)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-22 13:25:51,863][0m A new study created in memory with name: no-name-d7691837-6c4e-4b82-a5bb-d7580fbbae39[0m


0:	learn: 0.5938359	test: 0.7398290	best: 0.7398290 (0)	total: 14.4ms	remaining: 4m 48s
1000:	learn: 0.5716763	test: 0.7225666	best: 0.7225666 (1000)	total: 6.43s	remaining: 2m 1s
2000:	learn: 0.5666490	test: 0.7188411	best: 0.7188411 (2000)	total: 12.3s	remaining: 1m 51s
3000:	learn: 0.5636132	test: 0.7166979	best: 0.7166979 (3000)	total: 18.6s	remaining: 1m 45s
4000:	learn: 0.5614190	test: 0.7152359	best: 0.7152359 (4000)	total: 25.3s	remaining: 1m 41s
5000:	learn: 0.5597239	test: 0.7141524	best: 0.7141524 (5000)	total: 31.2s	remaining: 1m 33s
6000:	learn: 0.5583984	test: 0.7133502	best: 0.7133502 (6000)	total: 37.4s	remaining: 1m 27s
7000:	learn: 0.5573014	test: 0.7127483	best: 0.7127483 (7000)	total: 43.3s	remaining: 1m 20s
8000:	learn: 0.5563847	test: 0.7122987	best: 0.7122987 (8000)	total: 49.5s	remaining: 1m 14s
9000:	learn: 0.5555709	test: 0.7119351	best: 0.7119351 (9000)	total: 56.4s	remaining: 1m 8s
10000:	learn: 0.5548413	test: 0.7116404	best: 0.7116404 (10000)	total: 1m 2s	

[32m[I 2021-08-22 13:31:10,845][0m Trial 0 finished with value: 0.71037410373623 and parameters: {'max_depth': 4, 'learning_rate': 0.01269430234571873, 'max_bin': 653, 'min_data_in_leaf': 28, 'reg_lambda': 9.787218153714731, 'subsample': 0.9122591527181017}. Best is trial 0 with value: 0.71037410373623.[0m


0:	learn: 0.5938191	test: 0.7398162	best: 0.7398162 (0)	total: 8.27ms	remaining: 2m 45s
1000:	learn: 0.5700426	test: 0.7214214	best: 0.7214214 (1000)	total: 7.06s	remaining: 2m 14s
2000:	learn: 0.5647411	test: 0.7176170	best: 0.7176170 (2000)	total: 14s	remaining: 2m 6s
3000:	learn: 0.5615739	test: 0.7155143	best: 0.7155143 (3000)	total: 21.1s	remaining: 1m 59s
4000:	learn: 0.5593408	test: 0.7141476	best: 0.7141476 (4000)	total: 28.6s	remaining: 1m 54s
5000:	learn: 0.5576759	test: 0.7132426	best: 0.7132426 (5000)	total: 35.7s	remaining: 1m 47s
6000:	learn: 0.5562792	test: 0.7125920	best: 0.7125916 (5999)	total: 42.7s	remaining: 1m 39s
7000:	learn: 0.5551105	test: 0.7121245	best: 0.7121245 (6999)	total: 49.4s	remaining: 1m 31s
8000:	learn: 0.5540706	test: 0.7117541	best: 0.7117541 (8000)	total: 56.5s	remaining: 1m 24s
9000:	learn: 0.5531114	test: 0.7114652	best: 0.7114652 (9000)	total: 1m 4s	remaining: 1m 18s
10000:	learn: 0.5522312	test: 0.7112440	best: 0.7112440 (10000)	total: 1m 11s	

[32m[I 2021-08-22 13:36:05,515][0m Trial 1 finished with value: 0.7102981730210084 and parameters: {'max_depth': 5, 'learning_rate': 0.012168326966201095, 'max_bin': 617, 'min_data_in_leaf': 26, 'reg_lambda': 8.288848868268229, 'subsample': 0.9492105525714137}. Best is trial 1 with value: 0.7102981730210084.[0m


0:	learn: 0.5938144	test: 0.7398133	best: 0.7398133 (0)	total: 8.52ms	remaining: 2m 50s
1000:	learn: 0.5699427	test: 0.7213649	best: 0.7213649 (1000)	total: 6.98s	remaining: 2m 12s
2000:	learn: 0.5646409	test: 0.7175631	best: 0.7175631 (2000)	total: 14.2s	remaining: 2m 7s
3000:	learn: 0.5614495	test: 0.7154255	best: 0.7154255 (3000)	total: 21.3s	remaining: 2m
4000:	learn: 0.5592585	test: 0.7140904	best: 0.7140904 (4000)	total: 28.3s	remaining: 1m 53s
5000:	learn: 0.5575908	test: 0.7131863	best: 0.7131863 (5000)	total: 36.2s	remaining: 1m 48s
6000:	learn: 0.5562351	test: 0.7125496	best: 0.7125496 (6000)	total: 43.2s	remaining: 1m 40s
7000:	learn: 0.5550963	test: 0.7120770	best: 0.7120770 (7000)	total: 50.1s	remaining: 1m 33s
8000:	learn: 0.5540701	test: 0.7117417	best: 0.7117417 (8000)	total: 57.3s	remaining: 1m 25s
9000:	learn: 0.5531555	test: 0.7114504	best: 0.7114502 (8988)	total: 1m 5s	remaining: 1m 19s
10000:	learn: 0.5522987	test: 0.7112287	best: 0.7112287 (9999)	total: 1m 12s	rem

[32m[I 2021-08-22 13:41:01,635][0m Trial 2 finished with value: 0.7102299949781282 and parameters: {'max_depth': 5, 'learning_rate': 0.012382193346867496, 'max_bin': 622, 'min_data_in_leaf': 28, 'reg_lambda': 14.147385811572525, 'subsample': 0.9631065791273526}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938466	test: 0.7398384	best: 0.7398384 (0)	total: 7.09ms	remaining: 2m 21s
1000:	learn: 0.5721809	test: 0.7229488	best: 0.7229488 (1000)	total: 7.04s	remaining: 2m 13s
2000:	learn: 0.5671667	test: 0.7192001	best: 0.7192001 (2000)	total: 13.4s	remaining: 2m
3000:	learn: 0.5641142	test: 0.7170444	best: 0.7170444 (3000)	total: 19.4s	remaining: 1m 49s
4000:	learn: 0.5618663	test: 0.7154973	best: 0.7154973 (4000)	total: 25.7s	remaining: 1m 42s
5000:	learn: 0.5601780	test: 0.7144037	best: 0.7144037 (5000)	total: 31.6s	remaining: 1m 34s
6000:	learn: 0.5588563	test: 0.7136134	best: 0.7136134 (6000)	total: 38.8s	remaining: 1m 30s
7000:	learn: 0.5577784	test: 0.7130099	best: 0.7130099 (7000)	total: 45.1s	remaining: 1m 23s
8000:	learn: 0.5568603	test: 0.7125245	best: 0.7125245 (8000)	total: 51s	remaining: 1m 16s
9000:	learn: 0.5560621	test: 0.7121364	best: 0.7121364 (8999)	total: 57.2s	remaining: 1m 9s
10000:	learn: 0.5553429	test: 0.7118158	best: 0.7118158 (10000)	total: 1m 3s	remai

[32m[I 2021-08-22 13:45:21,727][0m Trial 3 finished with value: 0.7104474904271959 and parameters: {'max_depth': 4, 'learning_rate': 0.011932427742417116, 'max_bin': 624, 'min_data_in_leaf': 26, 'reg_lambda': 13.135582159979535, 'subsample': 0.9159691162660194}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5937840	test: 0.7397888	best: 0.7397888 (0)	total: 8.85ms	remaining: 2m 57s
1000:	learn: 0.5681361	test: 0.7201894	best: 0.7201894 (1000)	total: 8.49s	remaining: 2m 41s
2000:	learn: 0.5625818	test: 0.7163682	best: 0.7163682 (2000)	total: 16.2s	remaining: 2m 25s
3000:	learn: 0.5593382	test: 0.7143979	best: 0.7143979 (3000)	total: 23.6s	remaining: 2m 13s
4000:	learn: 0.5571017	test: 0.7132986	best: 0.7132986 (4000)	total: 31.3s	remaining: 2m 5s
5000:	learn: 0.5552929	test: 0.7125855	best: 0.7125855 (5000)	total: 39.9s	remaining: 1m 59s
6000:	learn: 0.5537486	test: 0.7120915	best: 0.7120915 (6000)	total: 47.4s	remaining: 1m 50s
7000:	learn: 0.5523582	test: 0.7117126	best: 0.7117126 (7000)	total: 55.2s	remaining: 1m 42s
8000:	learn: 0.5510556	test: 0.7114185	best: 0.7114182 (7999)	total: 1m 3s	remaining: 1m 34s
9000:	learn: 0.5498491	test: 0.7112166	best: 0.7112166 (9000)	total: 1m 11s	remaining: 1m 27s
10000:	learn: 0.5487003	test: 0.7110536	best: 0.7110536 (10000)	total: 1m 1

[32m[I 2021-08-22 13:50:08,955][0m Trial 4 finished with value: 0.7104207261054428 and parameters: {'max_depth': 6, 'learning_rate': 0.012723642012095892, 'max_bin': 660, 'min_data_in_leaf': 36, 'reg_lambda': 10.85321691602322, 'subsample': 0.8744870760932077}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938292	test: 0.7398228	best: 0.7398228 (0)	total: 7.17ms	remaining: 2m 23s
1000:	learn: 0.5714392	test: 0.7223595	best: 0.7223595 (1000)	total: 6.39s	remaining: 2m 1s
2000:	learn: 0.5664533	test: 0.7186890	best: 0.7186890 (2000)	total: 12.5s	remaining: 1m 52s
3000:	learn: 0.5632858	test: 0.7164377	best: 0.7164377 (3000)	total: 19.6s	remaining: 1m 51s
4000:	learn: 0.5610866	test: 0.7149744	best: 0.7149744 (4000)	total: 25.9s	remaining: 1m 43s
5000:	learn: 0.5594232	test: 0.7139419	best: 0.7139419 (5000)	total: 31.8s	remaining: 1m 35s
6000:	learn: 0.5581184	test: 0.7131664	best: 0.7131664 (6000)	total: 38s	remaining: 1m 28s
7000:	learn: 0.5570582	test: 0.7125846	best: 0.7125846 (7000)	total: 44s	remaining: 1m 21s
8000:	learn: 0.5561525	test: 0.7121678	best: 0.7121678 (8000)	total: 51.2s	remaining: 1m 16s
9000:	learn: 0.5553521	test: 0.7118205	best: 0.7118205 (8999)	total: 57.1s	remaining: 1m 9s
10000:	learn: 0.5546377	test: 0.7115648	best: 0.7115648 (10000)	total: 1m 3s	rema

[32m[I 2021-08-22 13:54:28,627][0m Trial 5 finished with value: 0.7103264974182542 and parameters: {'max_depth': 4, 'learning_rate': 0.013266281620937282, 'max_bin': 629, 'min_data_in_leaf': 27, 'reg_lambda': 9.999754243499133, 'subsample': 0.892210129133089}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938268	test: 0.7398236	best: 0.7398236 (0)	total: 7.97ms	remaining: 2m 39s
1000:	learn: 0.5703823	test: 0.7216793	best: 0.7216793 (1000)	total: 6.89s	remaining: 2m 10s
2000:	learn: 0.5652132	test: 0.7179442	best: 0.7179442 (2000)	total: 14s	remaining: 2m 5s
3000:	learn: 0.5620497	test: 0.7157976	best: 0.7157976 (3000)	total: 22s	remaining: 2m 4s
4000:	learn: 0.5598172	test: 0.7143764	best: 0.7143764 (4000)	total: 28.7s	remaining: 1m 54s
5000:	learn: 0.5581547	test: 0.7134330	best: 0.7134330 (5000)	total: 35.8s	remaining: 1m 47s
6000:	learn: 0.5568365	test: 0.7127744	best: 0.7127744 (6000)	total: 42.7s	remaining: 1m 39s
7000:	learn: 0.5556784	test: 0.7122421	best: 0.7122421 (7000)	total: 49.5s	remaining: 1m 31s
8000:	learn: 0.5546826	test: 0.7118882	best: 0.7118882 (8000)	total: 57.2s	remaining: 1m 25s
9000:	learn: 0.5537811	test: 0.7116018	best: 0.7116018 (9000)	total: 1m 4s	remaining: 1m 18s
10000:	learn: 0.5529619	test: 0.7113793	best: 0.7113792 (9997)	total: 1m 11s	rema

[32m[I 2021-08-22 13:59:22,527][0m Trial 6 finished with value: 0.7103416570139254 and parameters: {'max_depth': 5, 'learning_rate': 0.011624079385799173, 'max_bin': 615, 'min_data_in_leaf': 37, 'reg_lambda': 15.101372146567666, 'subsample': 0.9245219122953967}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938191	test: 0.7398164	best: 0.7398164 (0)	total: 8.51ms	remaining: 2m 50s
1000:	learn: 0.5701819	test: 0.7215045	best: 0.7215045 (1000)	total: 6.88s	remaining: 2m 10s
2000:	learn: 0.5648307	test: 0.7176499	best: 0.7176499 (2000)	total: 13.8s	remaining: 2m 4s
3000:	learn: 0.5616706	test: 0.7155299	best: 0.7155299 (3000)	total: 20.5s	remaining: 1m 55s
4000:	learn: 0.5594692	test: 0.7141856	best: 0.7141856 (4000)	total: 28.3s	remaining: 1m 53s
5000:	learn: 0.5578523	test: 0.7133129	best: 0.7133129 (5000)	total: 35.4s	remaining: 1m 46s
6000:	learn: 0.5565046	test: 0.7126564	best: 0.7126564 (6000)	total: 42s	remaining: 1m 37s
7000:	learn: 0.5553854	test: 0.7122032	best: 0.7122032 (6998)	total: 48.9s	remaining: 1m 30s
8000:	learn: 0.5544070	test: 0.7118733	best: 0.7118733 (8000)	total: 56.5s	remaining: 1m 24s
9000:	learn: 0.5535087	test: 0.7116249	best: 0.7116249 (9000)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5526552	test: 0.7113853	best: 0.7113853 (10000)	total: 1m 10s	

[32m[I 2021-08-22 14:04:14,058][0m Trial 7 finished with value: 0.7103629656406496 and parameters: {'max_depth': 5, 'learning_rate': 0.012159238476839763, 'max_bin': 616, 'min_data_in_leaf': 24, 'reg_lambda': 12.222968940284636, 'subsample': 0.8718133809749237}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938032	test: 0.7398035	best: 0.7398035 (0)	total: 7.65ms	remaining: 2m 33s
1000:	learn: 0.5695308	test: 0.7210418	best: 0.7210418 (1000)	total: 6.99s	remaining: 2m 12s
2000:	learn: 0.5642264	test: 0.7172562	best: 0.7172562 (2000)	total: 13.6s	remaining: 2m 2s
3000:	learn: 0.5610696	test: 0.7151767	best: 0.7151767 (3000)	total: 20.6s	remaining: 1m 56s
4000:	learn: 0.5588950	test: 0.7138533	best: 0.7138533 (4000)	total: 27.6s	remaining: 1m 50s
5000:	learn: 0.5572570	test: 0.7129882	best: 0.7129882 (5000)	total: 34.7s	remaining: 1m 44s
6000:	learn: 0.5559238	test: 0.7124043	best: 0.7124043 (6000)	total: 41.6s	remaining: 1m 37s
7000:	learn: 0.5547865	test: 0.7119482	best: 0.7119482 (6999)	total: 48.1s	remaining: 1m 29s
8000:	learn: 0.5537772	test: 0.7116418	best: 0.7116418 (8000)	total: 55.3s	remaining: 1m 22s
9000:	learn: 0.5528553	test: 0.7113486	best: 0.7113480 (8999)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5519956	test: 0.7111136	best: 0.7111136 (10000)	total: 1m 9s

[32m[I 2021-08-22 14:09:01,048][0m Trial 8 finished with value: 0.7102463816113925 and parameters: {'max_depth': 5, 'learning_rate': 0.013228938994242734, 'max_bin': 658, 'min_data_in_leaf': 38, 'reg_lambda': 14.07869196523593, 'subsample': 0.8753590462654917}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938457	test: 0.7398366	best: 0.7398366 (0)	total: 7.1ms	remaining: 2m 21s
1000:	learn: 0.5721174	test: 0.7228881	best: 0.7228881 (1000)	total: 6.52s	remaining: 2m 3s
2000:	learn: 0.5671270	test: 0.7191951	best: 0.7191951 (2000)	total: 13.3s	remaining: 1m 59s
3000:	learn: 0.5640859	test: 0.7170151	best: 0.7170151 (3000)	total: 19.6s	remaining: 1m 50s
4000:	learn: 0.5618701	test: 0.7155109	best: 0.7155109 (4000)	total: 25.5s	remaining: 1m 42s
5000:	learn: 0.5601777	test: 0.7144124	best: 0.7144124 (5000)	total: 31.7s	remaining: 1m 35s
6000:	learn: 0.5588488	test: 0.7135953	best: 0.7135952 (5999)	total: 38.2s	remaining: 1m 29s
7000:	learn: 0.5577832	test: 0.7130008	best: 0.7130008 (7000)	total: 44.8s	remaining: 1m 23s
8000:	learn: 0.5568558	test: 0.7125014	best: 0.7125014 (8000)	total: 51s	remaining: 1m 16s
9000:	learn: 0.5560743	test: 0.7121390	best: 0.7121390 (9000)	total: 56.8s	remaining: 1m 9s
10000:	learn: 0.5553587	test: 0.7118398	best: 0.7118398 (10000)	total: 1m 3s	rem

[32m[I 2021-08-22 14:13:21,459][0m Trial 9 finished with value: 0.7104515864332539 and parameters: {'max_depth': 4, 'learning_rate': 0.011966292839120778, 'max_bin': 637, 'min_data_in_leaf': 29, 'reg_lambda': 11.237565135455633, 'subsample': 0.8983541721655969}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5937841	test: 0.7397897	best: 0.7397897 (0)	total: 9.16ms	remaining: 3m 3s
1000:	learn: 0.5681421	test: 0.7201858	best: 0.7201858 (1000)	total: 8.08s	remaining: 2m 33s
2000:	learn: 0.5625844	test: 0.7163705	best: 0.7163705 (2000)	total: 16.2s	remaining: 2m 26s
3000:	learn: 0.5593117	test: 0.7143750	best: 0.7143750 (3000)	total: 24.1s	remaining: 2m 16s
4000:	learn: 0.5570569	test: 0.7132025	best: 0.7132025 (4000)	total: 32s	remaining: 2m 8s
5000:	learn: 0.5552864	test: 0.7124904	best: 0.7124904 (5000)	total: 39.8s	remaining: 1m 59s
6000:	learn: 0.5537566	test: 0.7119522	best: 0.7119522 (6000)	total: 48.4s	remaining: 1m 52s
7000:	learn: 0.5523787	test: 0.7115850	best: 0.7115848 (6998)	total: 56.4s	remaining: 1m 44s
8000:	learn: 0.5511055	test: 0.7113026	best: 0.7113024 (7999)	total: 1m 4s	remaining: 1m 36s
9000:	learn: 0.5498962	test: 0.7110862	best: 0.7110862 (8999)	total: 1m 12s	remaining: 1m 28s
10000:	learn: 0.5487539	test: 0.7109033	best: 0.7109033 (10000)	total: 1m 20s	

[32m[I 2021-08-22 14:18:09,976][0m Trial 10 finished with value: 0.7102686115149305 and parameters: {'max_depth': 6, 'learning_rate': 0.012654953006366428, 'max_bin': 642, 'min_data_in_leaf': 19, 'reg_lambda': 15.999071087402015, 'subsample': 0.9624706529055638}. Best is trial 2 with value: 0.7102299949781282.[0m


0:	learn: 0.5938033	test: 0.7398047	best: 0.7398047 (0)	total: 8.06ms	remaining: 2m 41s
1000:	learn: 0.5694933	test: 0.7209806	best: 0.7209806 (1000)	total: 7.2s	remaining: 2m 16s
2000:	learn: 0.5641758	test: 0.7172003	best: 0.7172003 (2000)	total: 14.1s	remaining: 2m 7s
3000:	learn: 0.5610253	test: 0.7151065	best: 0.7151065 (3000)	total: 21.9s	remaining: 2m 4s
4000:	learn: 0.5588280	test: 0.7137900	best: 0.7137900 (4000)	total: 29.2s	remaining: 1m 56s
5000:	learn: 0.5571579	test: 0.7128863	best: 0.7128863 (5000)	total: 35.9s	remaining: 1m 47s
6000:	learn: 0.5558201	test: 0.7123047	best: 0.7123047 (6000)	total: 43.1s	remaining: 1m 40s
7000:	learn: 0.5546658	test: 0.7118613	best: 0.7118613 (7000)	total: 50.4s	remaining: 1m 33s
8000:	learn: 0.5536390	test: 0.7115429	best: 0.7115429 (8000)	total: 57.7s	remaining: 1m 26s
9000:	learn: 0.5526975	test: 0.7112786	best: 0.7112786 (9000)	total: 1m 4s	remaining: 1m 19s
10000:	learn: 0.5518165	test: 0.7110517	best: 0.7110517 (10000)	total: 1m 11s	

[32m[I 2021-08-22 14:23:00,216][0m Trial 11 finished with value: 0.7101568290815834 and parameters: {'max_depth': 5, 'learning_rate': 0.013197859116004091, 'max_bin': 649, 'min_data_in_leaf': 42, 'reg_lambda': 14.390010371698583, 'subsample': 0.9404996480656921}. Best is trial 11 with value: 0.7101568290815834.[0m


0:	learn: 0.5937794	test: 0.7397859	best: 0.7397859 (0)	total: 9.16ms	remaining: 3m 3s
1000:	learn: 0.5679256	test: 0.7200366	best: 0.7200366 (1000)	total: 8.04s	remaining: 2m 32s
2000:	learn: 0.5623447	test: 0.7162203	best: 0.7162203 (2000)	total: 16s	remaining: 2m 23s
3000:	learn: 0.5591184	test: 0.7142552	best: 0.7142552 (3000)	total: 24s	remaining: 2m 15s
4000:	learn: 0.5568558	test: 0.7131335	best: 0.7131335 (4000)	total: 32.2s	remaining: 2m 8s
5000:	learn: 0.5550401	test: 0.7124125	best: 0.7124125 (5000)	total: 40.2s	remaining: 2m
6000:	learn: 0.5535082	test: 0.7119198	best: 0.7119193 (5999)	total: 48s	remaining: 1m 51s
7000:	learn: 0.5521104	test: 0.7115640	best: 0.7115640 (7000)	total: 55.7s	remaining: 1m 43s
8000:	learn: 0.5508208	test: 0.7113117	best: 0.7113117 (8000)	total: 1m 4s	remaining: 1m 36s
9000:	learn: 0.5495819	test: 0.7111016	best: 0.7111016 (9000)	total: 1m 12s	remaining: 1m 28s
10000:	learn: 0.5484328	test: 0.7109471	best: 0.7109471 (10000)	total: 1m 20s	remainin

[32m[I 2021-08-22 14:28:09,056][0m Trial 12 finished with value: 0.710324333119345 and parameters: {'max_depth': 6, 'learning_rate': 0.013012689944344458, 'max_bin': 649, 'min_data_in_leaf': 42, 'reg_lambda': 14.409709606841531, 'subsample': 0.9448477487956682}. Best is trial 11 with value: 0.7101568290815834.[0m


0:	learn: 0.5938279	test: 0.7398245	best: 0.7398245 (0)	total: 12.6ms	remaining: 4m 11s
1000:	learn: 0.5705254	test: 0.7217957	best: 0.7217957 (1000)	total: 7.25s	remaining: 2m 17s
2000:	learn: 0.5652069	test: 0.7179268	best: 0.7179268 (2000)	total: 15.2s	remaining: 2m 17s
3000:	learn: 0.5620408	test: 0.7157770	best: 0.7157770 (3000)	total: 22.3s	remaining: 2m 6s
4000:	learn: 0.5598185	test: 0.7143836	best: 0.7143836 (4000)	total: 29.2s	remaining: 1m 56s
5000:	learn: 0.5581642	test: 0.7134341	best: 0.7134341 (5000)	total: 36.3s	remaining: 1m 48s
6000:	learn: 0.5568189	test: 0.7127579	best: 0.7127579 (6000)	total: 43.6s	remaining: 1m 41s
7000:	learn: 0.5556886	test: 0.7122687	best: 0.7122687 (7000)	total: 50.9s	remaining: 1m 34s
8000:	learn: 0.5546755	test: 0.7118612	best: 0.7118607 (7998)	total: 58.1s	remaining: 1m 27s
9000:	learn: 0.5537718	test: 0.7115663	best: 0.7115663 (9000)	total: 1m 4s	remaining: 1m 19s
10000:	learn: 0.5529309	test: 0.7113293	best: 0.7113284 (9997)	total: 1m 12s

[32m[I 2021-08-22 14:33:06,974][0m Trial 13 finished with value: 0.7102881474313177 and parameters: {'max_depth': 5, 'learning_rate': 0.011477127962264397, 'max_bin': 606, 'min_data_in_leaf': 33, 'reg_lambda': 15.977518905229068, 'subsample': 0.9639579999293086}. Best is trial 11 with value: 0.7101568290815834.[0m


0:	learn: 0.5938068	test: 0.7398077	best: 0.7398077 (0)	total: 8.35ms	remaining: 2m 47s
1000:	learn: 0.5695923	test: 0.7211045	best: 0.7211045 (1000)	total: 6.92s	remaining: 2m 11s
2000:	learn: 0.5642366	test: 0.7172653	best: 0.7172653 (2000)	total: 15.1s	remaining: 2m 16s
3000:	learn: 0.5611184	test: 0.7152046	best: 0.7152046 (3000)	total: 22.2s	remaining: 2m 5s
4000:	learn: 0.5589245	test: 0.7138581	best: 0.7138581 (4000)	total: 28.8s	remaining: 1m 55s
5000:	learn: 0.5572740	test: 0.7129920	best: 0.7129920 (5000)	total: 36s	remaining: 1m 47s
6000:	learn: 0.5559366	test: 0.7123866	best: 0.7123866 (5998)	total: 44.1s	remaining: 1m 42s
7000:	learn: 0.5548023	test: 0.7119625	best: 0.7119625 (7000)	total: 50.9s	remaining: 1m 34s
8000:	learn: 0.5537729	test: 0.7116305	best: 0.7116303 (7999)	total: 57.9s	remaining: 1m 26s
9000:	learn: 0.5528318	test: 0.7113620	best: 0.7113620 (9000)	total: 1m 4s	remaining: 1m 19s
10000:	learn: 0.5519672	test: 0.7111839	best: 0.7111836 (9999)	total: 1m 11s	r

[32m[I 2021-08-22 14:37:43,567][0m Trial 14 finished with value: 0.7102731504585664 and parameters: {'max_depth': 5, 'learning_rate': 0.012971315015920802, 'max_bin': 666, 'min_data_in_leaf': 43, 'reg_lambda': 13.130901249838644, 'subsample': 0.9383525362850756}. Best is trial 11 with value: 0.7101568290815834.[0m


Number of finished trials: 15
Best trial: {'max_depth': 5, 'learning_rate': 0.013197859116004091, 'max_bin': 649, 'min_data_in_leaf': 42, 'reg_lambda': 14.390010371698583, 'subsample': 0.9404996480656921}


In [14]:
study.best_params

{'max_depth': 5,
 'learning_rate': 0.013197859116004091,
 'max_bin': 649,
 'min_data_in_leaf': 42,
 'reg_lambda': 14.390010371698583,
 'subsample': 0.9404996480656921}

# Log

====== Ordinal encoding =========

 0.7106017728195324 no noise ver2

 0.71414426290903 row-wise noise ver1

 0.7144009762376538 random noise ver3
 
 ====== One-hot encoding =========
 
 0.7104558237409161 no noise ver7
 
 0.71027414342552 no noise ver8 (narrow space)
 
 
 