In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

#import lightgbm as lgb
#import xgboost as xgb
import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,C,B,B,A,E,C,N,...,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,A,A,B,D,A,F,A,O,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,A,C,B,D,A,D,A,F,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,A,C,B,D,A,E,C,K,...,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,A,C,B,D,A,E,A,N,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [6]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()
'''

"\n# List of categorical columns\nobject_cols = [col for col in features.columns if 'cat' in col]\n\n# ordinal-encode categorical columns\nX = features.copy()\nX_test = test.drop(['id'], axis=1).copy()\nordinal_encoder = OrdinalEncoder()\nX[object_cols] = ordinal_encoder.fit_transform(features[object_cols])\nX_test[object_cols] = ordinal_encoder.transform(test[object_cols])\n\n# Preview the ordinal-encoded features\nX.head()\n"

In [7]:
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()

Unnamed: 0,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,...,ohe_46,ohe_47,ohe_48,ohe_49,ohe_50,ohe_51,ohe_52,ohe_53,ohe_54,ohe_55
0,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [8]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [9]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [10]:
pseudo = pd.read_csv("../input/30dml-pseudo2/xgb_submission_k.txt")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [11]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [12]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
             'bootstrap_type': 'Poisson',
            'loss_function': 'RMSE',
            'eval_metric': 'RMSE',
          'task_type': 'GPU',
          'max_depth':trial.suggest_int('max_depth', 5, 7),
           'learning_rate':trial.suggest_uniform('learning_rate', 1.1e-2, 1.2e-2),
          'n_estimators':N_ESTIMATORS,
        'max_bin':trial.suggest_int('max_bin', 623, 653),
             'min_data_in_leaf':trial.suggest_int('min_data_in_leaf', 46, 58),
         'reg_lambda':trial.suggest_uniform('reg_lambda', 39.85, 42.85),
            'subsample': trial.suggest_uniform('subsample', 0.903, 0.953)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  ctb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_rn(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_seed'] = inseed

    model = ctb.CatBoostRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        use_best_model=True,
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE
       
    )


    ctb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, ctb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [13]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 15)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-24 06:39:13,158][0m A new study created in memory with name: no-name-7b0a9758-4bf2-41ba-8d5c-e8588927c18f[0m


0:	learn: 0.5942798	test: 0.7398017	best: 0.7398017 (0)	total: 17.5ms	remaining: 5m 49s
1000:	learn: 0.5692836	test: 0.7205958	best: 0.7205958 (1000)	total: 8.13s	remaining: 2m 34s
2000:	learn: 0.5637780	test: 0.7167496	best: 0.7167496 (2000)	total: 16s	remaining: 2m 24s
3000:	learn: 0.5605262	test: 0.7146951	best: 0.7146951 (3000)	total: 23.9s	remaining: 2m 15s
4000:	learn: 0.5582991	test: 0.7134437	best: 0.7134437 (4000)	total: 32.5s	remaining: 2m 9s
5000:	learn: 0.5565759	test: 0.7126693	best: 0.7126693 (5000)	total: 40.5s	remaining: 2m 1s
6000:	learn: 0.5551464	test: 0.7121203	best: 0.7121203 (6000)	total: 48.4s	remaining: 1m 52s
7000:	learn: 0.5538835	test: 0.7117369	best: 0.7117369 (7000)	total: 56.1s	remaining: 1m 44s
8000:	learn: 0.5527257	test: 0.7114324	best: 0.7114324 (8000)	total: 1m 4s	remaining: 1m 37s
9000:	learn: 0.5516437	test: 0.7112033	best: 0.7112033 (9000)	total: 1m 12s	remaining: 1m 28s
10000:	learn: 0.5506397	test: 0.7110146	best: 0.7110146 (9999)	total: 1m 20s	r

[32m[I 2021-08-24 06:45:40,900][0m Trial 0 finished with value: 0.7102646490888381 and parameters: {'max_depth': 6, 'learning_rate': 0.011996253080464788, 'max_bin': 636, 'min_data_in_leaf': 55, 'reg_lambda': 40.430411453301886, 'subsample': 0.9260377054133831}. Best is trial 0 with value: 0.7102646490888381.[0m


0:	learn: 0.5942820	test: 0.7398058	best: 0.7398058 (0)	total: 10.7ms	remaining: 3m 34s
1000:	learn: 0.5684277	test: 0.7201629	best: 0.7201629 (1000)	total: 9.9s	remaining: 3m 7s
2000:	learn: 0.5627494	test: 0.7163207	best: 0.7163207 (2000)	total: 18.9s	remaining: 2m 50s
3000:	learn: 0.5593745	test: 0.7143312	best: 0.7143312 (3000)	total: 27.8s	remaining: 2m 37s
4000:	learn: 0.5570249	test: 0.7131998	best: 0.7131994 (3999)	total: 37.4s	remaining: 2m 29s
5000:	learn: 0.5551215	test: 0.7125030	best: 0.7125030 (5000)	total: 46.2s	remaining: 2m 18s
6000:	learn: 0.5534499	test: 0.7120225	best: 0.7120225 (6000)	total: 55.3s	remaining: 2m 8s
7000:	learn: 0.5519351	test: 0.7116662	best: 0.7116662 (6999)	total: 1m 4s	remaining: 1m 59s
8000:	learn: 0.5505283	test: 0.7114183	best: 0.7114183 (7999)	total: 1m 14s	remaining: 1m 51s
9000:	learn: 0.5491958	test: 0.7112208	best: 0.7112208 (9000)	total: 1m 23s	remaining: 1m 41s
10000:	learn: 0.5479016	test: 0.7110548	best: 0.7110548 (10000)	total: 1m 32

[32m[I 2021-08-24 06:50:52,904][0m Trial 1 finished with value: 0.7104730675863905 and parameters: {'max_depth': 7, 'learning_rate': 0.011133405266163772, 'max_bin': 634, 'min_data_in_leaf': 47, 'reg_lambda': 41.18286457716994, 'subsample': 0.926180914399277}. Best is trial 0 with value: 0.7102646490888381.[0m


0:	learn: 0.5942713	test: 0.7397973	best: 0.7397973 (0)	total: 19.4ms	remaining: 6m 28s
1000:	learn: 0.5679930	test: 0.7198190	best: 0.7198190 (1000)	total: 9.17s	remaining: 2m 54s
2000:	learn: 0.5622692	test: 0.7159752	best: 0.7159752 (2000)	total: 18.5s	remaining: 2m 46s
3000:	learn: 0.5589553	test: 0.7140977	best: 0.7140977 (3000)	total: 27.8s	remaining: 2m 37s
4000:	learn: 0.5566202	test: 0.7130516	best: 0.7130516 (4000)	total: 36.7s	remaining: 2m 26s
5000:	learn: 0.5546923	test: 0.7123851	best: 0.7123849 (4999)	total: 45.8s	remaining: 2m 17s
6000:	learn: 0.5530018	test: 0.7119363	best: 0.7119363 (6000)	total: 55.7s	remaining: 2m 10s
7000:	learn: 0.5514635	test: 0.7116197	best: 0.7116197 (7000)	total: 1m 4s	remaining: 1m 59s
8000:	learn: 0.5500129	test: 0.7113734	best: 0.7113734 (8000)	total: 1m 13s	remaining: 1m 50s
9000:	learn: 0.5486307	test: 0.7111705	best: 0.7111705 (9000)	total: 1m 23s	remaining: 1m 41s
10000:	learn: 0.5473121	test: 0.7109968	best: 0.7109967 (9998)	total: 1m 

[32m[I 2021-08-24 06:55:36,512][0m Trial 2 finished with value: 0.710528553506454 and parameters: {'max_depth': 7, 'learning_rate': 0.011707817693489025, 'max_bin': 624, 'min_data_in_leaf': 48, 'reg_lambda': 42.055765713753786, 'subsample': 0.9285694468309899}. Best is trial 0 with value: 0.7102646490888381.[0m


0:	learn: 0.5943175	test: 0.7398308	best: 0.7398308 (0)	total: 8.1ms	remaining: 2m 42s
1000:	learn: 0.5714029	test: 0.7220436	best: 0.7220436 (1000)	total: 7.24s	remaining: 2m 17s
2000:	learn: 0.5662011	test: 0.7182426	best: 0.7182426 (2000)	total: 14.3s	remaining: 2m 8s
3000:	learn: 0.5629810	test: 0.7160201	best: 0.7160201 (3000)	total: 21s	remaining: 1m 59s
4000:	learn: 0.5607204	test: 0.7145938	best: 0.7145938 (4000)	total: 28.3s	remaining: 1m 53s
5000:	learn: 0.5590429	test: 0.7136165	best: 0.7136165 (5000)	total: 35.4s	remaining: 1m 46s
6000:	learn: 0.5577385	test: 0.7129315	best: 0.7129315 (6000)	total: 42.2s	remaining: 1m 38s
7000:	learn: 0.5566093	test: 0.7123848	best: 0.7123848 (7000)	total: 49.3s	remaining: 1m 31s
8000:	learn: 0.5556569	test: 0.7119750	best: 0.7119750 (8000)	total: 55.9s	remaining: 1m 23s
9000:	learn: 0.5548095	test: 0.7116739	best: 0.7116739 (9000)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5540225	test: 0.7114226	best: 0.7114226 (9999)	total: 1m 10s	re

[32m[I 2021-08-24 07:00:27,985][0m Trial 3 finished with value: 0.7102994012381144 and parameters: {'max_depth': 5, 'learning_rate': 0.011116565513044801, 'max_bin': 628, 'min_data_in_leaf': 52, 'reg_lambda': 39.877811314547984, 'subsample': 0.9144847284555144}. Best is trial 0 with value: 0.7102646490888381.[0m


0:	learn: 0.5943102	test: 0.7398242	best: 0.7398242 (0)	total: 8.12ms	remaining: 2m 42s
1000:	learn: 0.5710323	test: 0.7217546	best: 0.7217546 (1000)	total: 6.82s	remaining: 2m 9s
2000:	learn: 0.5657777	test: 0.7179182	best: 0.7179182 (2000)	total: 13.8s	remaining: 2m 3s
3000:	learn: 0.5626045	test: 0.7157572	best: 0.7157572 (3000)	total: 21.4s	remaining: 2m 1s
4000:	learn: 0.5603658	test: 0.7143668	best: 0.7143668 (4000)	total: 28.1s	remaining: 1m 52s
5000:	learn: 0.5586978	test: 0.7133952	best: 0.7133952 (5000)	total: 35s	remaining: 1m 45s
6000:	learn: 0.5573751	test: 0.7126933	best: 0.7126933 (6000)	total: 41.9s	remaining: 1m 37s
7000:	learn: 0.5562687	test: 0.7121885	best: 0.7121885 (7000)	total: 49.5s	remaining: 1m 31s
8000:	learn: 0.5553031	test: 0.7118002	best: 0.7118002 (8000)	total: 56.5s	remaining: 1m 24s
9000:	learn: 0.5544170	test: 0.7114955	best: 0.7114955 (8999)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5536296	test: 0.7112495	best: 0.7112495 (10000)	total: 1m 10s	re

[32m[I 2021-08-24 07:05:20,027][0m Trial 4 finished with value: 0.7102143884809948 and parameters: {'max_depth': 5, 'learning_rate': 0.01161577552614408, 'max_bin': 651, 'min_data_in_leaf': 58, 'reg_lambda': 42.59797543622488, 'subsample': 0.9490038091829924}. Best is trial 4 with value: 0.7102143884809948.[0m


0:	learn: 0.5942695	test: 0.7397964	best: 0.7397964 (0)	total: 10.6ms	remaining: 3m 32s
1000:	learn: 0.5679672	test: 0.7198525	best: 0.7198525 (1000)	total: 8.96s	remaining: 2m 50s
2000:	learn: 0.5622359	test: 0.7160150	best: 0.7160150 (2000)	total: 17.7s	remaining: 2m 39s
3000:	learn: 0.5588985	test: 0.7141053	best: 0.7141052 (2999)	total: 27.2s	remaining: 2m 33s
4000:	learn: 0.5564951	test: 0.7129955	best: 0.7129955 (4000)	total: 36.1s	remaining: 2m 24s
5000:	learn: 0.5545777	test: 0.7123268	best: 0.7123268 (5000)	total: 44.7s	remaining: 2m 13s
6000:	learn: 0.5528813	test: 0.7118645	best: 0.7118635 (5996)	total: 54.4s	remaining: 2m 6s
7000:	learn: 0.5513138	test: 0.7115172	best: 0.7115172 (7000)	total: 1m 3s	remaining: 1m 57s
8000:	learn: 0.5498542	test: 0.7112537	best: 0.7112537 (8000)	total: 1m 12s	remaining: 1m 48s
9000:	learn: 0.5484770	test: 0.7110391	best: 0.7110389 (8998)	total: 1m 21s	remaining: 1m 39s
10000:	learn: 0.5471497	test: 0.7108787	best: 0.7108785 (9999)	total: 1m 3

[32m[I 2021-08-24 07:10:24,352][0m Trial 5 finished with value: 0.7104297578866465 and parameters: {'max_depth': 7, 'learning_rate': 0.011827123369630056, 'max_bin': 640, 'min_data_in_leaf': 53, 'reg_lambda': 42.7848830561804, 'subsample': 0.9426527725507905}. Best is trial 4 with value: 0.7102143884809948.[0m


0:	learn: 0.5943169	test: 0.7398307	best: 0.7398307 (0)	total: 8.23ms	remaining: 2m 44s
1000:	learn: 0.5713619	test: 0.7219992	best: 0.7219992 (1000)	total: 7.26s	remaining: 2m 17s
2000:	learn: 0.5661159	test: 0.7181546	best: 0.7181546 (2000)	total: 14.6s	remaining: 2m 11s
3000:	learn: 0.5629724	test: 0.7160165	best: 0.7160165 (3000)	total: 21.7s	remaining: 2m 2s
4000:	learn: 0.5607357	test: 0.7145878	best: 0.7145878 (4000)	total: 28.7s	remaining: 1m 54s
5000:	learn: 0.5590602	test: 0.7136048	best: 0.7136048 (5000)	total: 35.4s	remaining: 1m 46s
6000:	learn: 0.5577288	test: 0.7128934	best: 0.7128934 (6000)	total: 43s	remaining: 1m 40s
7000:	learn: 0.5566360	test: 0.7123786	best: 0.7123786 (7000)	total: 50s	remaining: 1m 32s
8000:	learn: 0.5556804	test: 0.7119808	best: 0.7119808 (8000)	total: 56.7s	remaining: 1m 25s
9000:	learn: 0.5548057	test: 0.7116619	best: 0.7116619 (9000)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5540270	test: 0.7114030	best: 0.7114030 (10000)	total: 1m 10s	re

[32m[I 2021-08-24 07:15:17,256][0m Trial 6 finished with value: 0.7102685063129754 and parameters: {'max_depth': 5, 'learning_rate': 0.011139434795786572, 'max_bin': 634, 'min_data_in_leaf': 47, 'reg_lambda': 41.958314868186726, 'subsample': 0.921449201201816}. Best is trial 4 with value: 0.7102143884809948.[0m


0:	learn: 0.5943092	test: 0.7398232	best: 0.7398232 (0)	total: 8.55ms	remaining: 2m 50s
1000:	learn: 0.5709968	test: 0.7217335	best: 0.7217335 (1000)	total: 6.74s	remaining: 2m 7s
2000:	learn: 0.5658099	test: 0.7179604	best: 0.7179604 (2000)	total: 13.9s	remaining: 2m 4s
3000:	learn: 0.5625660	test: 0.7157356	best: 0.7157354 (2999)	total: 20.8s	remaining: 1m 57s
4000:	learn: 0.5603390	test: 0.7143403	best: 0.7143403 (4000)	total: 28.2s	remaining: 1m 52s
5000:	learn: 0.5586367	test: 0.7133675	best: 0.7133674 (4999)	total: 35.3s	remaining: 1m 45s
6000:	learn: 0.5573096	test: 0.7126926	best: 0.7126924 (5998)	total: 42s	remaining: 1m 38s
7000:	learn: 0.5561974	test: 0.7122011	best: 0.7122009 (6999)	total: 49s	remaining: 1m 30s
8000:	learn: 0.5552099	test: 0.7118213	best: 0.7118213 (8000)	total: 56.9s	remaining: 1m 25s
9000:	learn: 0.5543606	test: 0.7115200	best: 0.7115200 (9000)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5535640	test: 0.7112883	best: 0.7112883 (9999)	total: 1m 10s	rema

[32m[I 2021-08-24 07:20:09,778][0m Trial 7 finished with value: 0.7101963050529576 and parameters: {'max_depth': 5, 'learning_rate': 0.011676167278242769, 'max_bin': 644, 'min_data_in_leaf': 49, 'reg_lambda': 40.18064303964313, 'subsample': 0.9522202466287447}. Best is trial 7 with value: 0.7101963050529576.[0m


0:	learn: 0.5942797	test: 0.7398014	best: 0.7398014 (0)	total: 8.96ms	remaining: 2m 59s
1000:	learn: 0.5692023	test: 0.7205124	best: 0.7205124 (1000)	total: 7.99s	remaining: 2m 31s
2000:	learn: 0.5637175	test: 0.7166749	best: 0.7166749 (2000)	total: 15.9s	remaining: 2m 22s
3000:	learn: 0.5605252	test: 0.7146551	best: 0.7146551 (3000)	total: 23.5s	remaining: 2m 13s
4000:	learn: 0.5583017	test: 0.7134457	best: 0.7134457 (4000)	total: 32.1s	remaining: 2m 8s
5000:	learn: 0.5565887	test: 0.7126453	best: 0.7126453 (5000)	total: 39.9s	remaining: 1m 59s
6000:	learn: 0.5551648	test: 0.7121107	best: 0.7121107 (6000)	total: 47.8s	remaining: 1m 51s
7000:	learn: 0.5539168	test: 0.7117223	best: 0.7117223 (6999)	total: 55.4s	remaining: 1m 42s
8000:	learn: 0.5527786	test: 0.7114496	best: 0.7114496 (8000)	total: 1m 4s	remaining: 1m 36s
9000:	learn: 0.5517064	test: 0.7112123	best: 0.7112123 (9000)	total: 1m 11s	remaining: 1m 27s
10000:	learn: 0.5507036	test: 0.7110321	best: 0.7110321 (10000)	total: 1m 1

[32m[I 2021-08-24 07:25:04,404][0m Trial 8 finished with value: 0.7102929948736298 and parameters: {'max_depth': 6, 'learning_rate': 0.011982684434242769, 'max_bin': 637, 'min_data_in_leaf': 46, 'reg_lambda': 41.978254984418946, 'subsample': 0.9315574842791983}. Best is trial 7 with value: 0.7101963050529576.[0m


0:	learn: 0.5943114	test: 0.7398264	best: 0.7398264 (0)	total: 8.33ms	remaining: 2m 46s
1000:	learn: 0.5711389	test: 0.7218228	best: 0.7218228 (1000)	total: 7.84s	remaining: 2m 28s
2000:	learn: 0.5658717	test: 0.7180226	best: 0.7180226 (2000)	total: 14.6s	remaining: 2m 11s
3000:	learn: 0.5627005	test: 0.7158661	best: 0.7158661 (3000)	total: 21.5s	remaining: 2m 1s
4000:	learn: 0.5604516	test: 0.7144429	best: 0.7144429 (4000)	total: 28.6s	remaining: 1m 54s
5000:	learn: 0.5587723	test: 0.7134472	best: 0.7134472 (5000)	total: 35.2s	remaining: 1m 45s
6000:	learn: 0.5574488	test: 0.7127471	best: 0.7127470 (5999)	total: 42.8s	remaining: 1m 39s
7000:	learn: 0.5563543	test: 0.7122446	best: 0.7122439 (6998)	total: 49.7s	remaining: 1m 32s
8000:	learn: 0.5553856	test: 0.7118739	best: 0.7118739 (8000)	total: 56.4s	remaining: 1m 24s
9000:	learn: 0.5545185	test: 0.7115740	best: 0.7115736 (8999)	total: 1m 3s	remaining: 1m 17s
10000:	learn: 0.5537220	test: 0.7113141	best: 0.7113141 (10000)	total: 1m 10

[32m[I 2021-08-24 07:29:55,576][0m Trial 9 finished with value: 0.7102484614531206 and parameters: {'max_depth': 5, 'learning_rate': 0.011513438955500457, 'max_bin': 645, 'min_data_in_leaf': 50, 'reg_lambda': 41.97069902454211, 'subsample': 0.9358390831201401}. Best is trial 7 with value: 0.7101963050529576.[0m


0:	learn: 0.5942908	test: 0.7398102	best: 0.7398102 (0)	total: 8.59ms	remaining: 2m 51s
1000:	learn: 0.5697294	test: 0.7209004	best: 0.7209004 (1000)	total: 8.04s	remaining: 2m 32s
2000:	learn: 0.5642191	test: 0.7170222	best: 0.7170222 (2000)	total: 16s	remaining: 2m 23s
3000:	learn: 0.5609395	test: 0.7149054	best: 0.7149054 (3000)	total: 24.3s	remaining: 2m 17s
4000:	learn: 0.5587016	test: 0.7136297	best: 0.7136297 (4000)	total: 31.7s	remaining: 2m 6s
5000:	learn: 0.5570259	test: 0.7128479	best: 0.7128479 (5000)	total: 39.5s	remaining: 1m 58s
6000:	learn: 0.5556187	test: 0.7122997	best: 0.7122997 (6000)	total: 47.4s	remaining: 1m 50s
7000:	learn: 0.5543773	test: 0.7118795	best: 0.7118792 (6996)	total: 55.9s	remaining: 1m 43s
8000:	learn: 0.5532451	test: 0.7115621	best: 0.7115617 (7999)	total: 1m 3s	remaining: 1m 35s
9000:	learn: 0.5522036	test: 0.7113143	best: 0.7113143 (9000)	total: 1m 11s	remaining: 1m 27s
10000:	learn: 0.5512074	test: 0.7111037	best: 0.7111035 (9997)	total: 1m 19s	

[32m[I 2021-08-24 07:35:22,212][0m Trial 10 finished with value: 0.7102732000578144 and parameters: {'max_depth': 6, 'learning_rate': 0.011351779072365082, 'max_bin': 651, 'min_data_in_leaf': 50, 'reg_lambda': 39.96030089353779, 'subsample': 0.9040164608750998}. Best is trial 7 with value: 0.7101963050529576.[0m


0:	learn: 0.5943103	test: 0.7398241	best: 0.7398241 (0)	total: 8.47ms	remaining: 2m 49s
1000:	learn: 0.5711018	test: 0.7218308	best: 0.7218308 (1000)	total: 7.88s	remaining: 2m 29s
2000:	learn: 0.5657903	test: 0.7179883	best: 0.7179883 (2000)	total: 14.8s	remaining: 2m 13s
3000:	learn: 0.5625789	test: 0.7157902	best: 0.7157902 (3000)	total: 21.7s	remaining: 2m 2s
4000:	learn: 0.5603466	test: 0.7143733	best: 0.7143733 (4000)	total: 28.8s	remaining: 1m 55s
5000:	learn: 0.5586681	test: 0.7134058	best: 0.7134058 (5000)	total: 35.5s	remaining: 1m 46s
6000:	learn: 0.5573577	test: 0.7127479	best: 0.7127479 (6000)	total: 42.6s	remaining: 1m 39s
7000:	learn: 0.5562398	test: 0.7122256	best: 0.7122256 (7000)	total: 49.8s	remaining: 1m 32s
8000:	learn: 0.5552745	test: 0.7118424	best: 0.7118421 (7999)	total: 57.5s	remaining: 1m 26s
9000:	learn: 0.5544104	test: 0.7115425	best: 0.7115425 (9000)	total: 1m 4s	remaining: 1m 18s
10000:	learn: 0.5536011	test: 0.7112962	best: 0.7112962 (10000)	total: 1m 11

[32m[I 2021-08-24 07:40:16,234][0m Trial 11 finished with value: 0.7102037390824761 and parameters: {'max_depth': 5, 'learning_rate': 0.011605762966224602, 'max_bin': 653, 'min_data_in_leaf': 58, 'reg_lambda': 40.83137486821077, 'subsample': 0.9519007178711322}. Best is trial 7 with value: 0.7101963050529576.[0m


0:	learn: 0.5943135	test: 0.7398269	best: 0.7398269 (0)	total: 8.18ms	remaining: 2m 43s
1000:	learn: 0.5711430	test: 0.7218373	best: 0.7218373 (1000)	total: 6.99s	remaining: 2m 12s
2000:	learn: 0.5659101	test: 0.7180453	best: 0.7180453 (2000)	total: 13.8s	remaining: 2m 3s
3000:	learn: 0.5627323	test: 0.7158577	best: 0.7158577 (3000)	total: 20.7s	remaining: 1m 57s
4000:	learn: 0.5605222	test: 0.7144418	best: 0.7144418 (4000)	total: 27.9s	remaining: 1m 51s
5000:	learn: 0.5588325	test: 0.7134556	best: 0.7134556 (5000)	total: 35.1s	remaining: 1m 45s
6000:	learn: 0.5575154	test: 0.7127671	best: 0.7127671 (6000)	total: 42s	remaining: 1m 38s
7000:	learn: 0.5563970	test: 0.7122532	best: 0.7122532 (7000)	total: 48.7s	remaining: 1m 30s
8000:	learn: 0.5554309	test: 0.7118706	best: 0.7118706 (8000)	total: 55.7s	remaining: 1m 23s
9000:	learn: 0.5545688	test: 0.7115666	best: 0.7115666 (9000)	total: 1m 3s	remaining: 1m 18s
10000:	learn: 0.5537765	test: 0.7113152	best: 0.7113143 (9997)	total: 1m 10s	r

[32m[I 2021-08-24 07:45:09,228][0m Trial 12 finished with value: 0.7102089947978685 and parameters: {'max_depth': 5, 'learning_rate': 0.011365984379537742, 'max_bin': 646, 'min_data_in_leaf': 57, 'reg_lambda': 40.70504959362708, 'subsample': 0.9522014269389555}. Best is trial 7 with value: 0.7101963050529576.[0m


0:	learn: 0.5943071	test: 0.7398228	best: 0.7398228 (0)	total: 14.9ms	remaining: 4m 58s
1000:	learn: 0.5709461	test: 0.7216886	best: 0.7216886 (1000)	total: 6.91s	remaining: 2m 11s
2000:	learn: 0.5656577	test: 0.7178384	best: 0.7178384 (2000)	total: 14s	remaining: 2m 5s
3000:	learn: 0.5625061	test: 0.7157088	best: 0.7157088 (3000)	total: 20.8s	remaining: 1m 57s
4000:	learn: 0.5602459	test: 0.7142933	best: 0.7142933 (4000)	total: 27.8s	remaining: 1m 51s
5000:	learn: 0.5585687	test: 0.7133359	best: 0.7133359 (5000)	total: 35.5s	remaining: 1m 46s
6000:	learn: 0.5572569	test: 0.7126435	best: 0.7126435 (6000)	total: 42.1s	remaining: 1m 38s
7000:	learn: 0.5561406	test: 0.7121510	best: 0.7121510 (7000)	total: 49s	remaining: 1m 30s
8000:	learn: 0.5551870	test: 0.7117806	best: 0.7117806 (8000)	total: 56.2s	remaining: 1m 24s
9000:	learn: 0.5543207	test: 0.7114888	best: 0.7114882 (8999)	total: 1m 2s	remaining: 1m 16s
10000:	learn: 0.5535276	test: 0.7112567	best: 0.7112567 (10000)	total: 1m 10s	re

[32m[I 2021-08-24 07:50:01,055][0m Trial 13 finished with value: 0.7101608935751483 and parameters: {'max_depth': 5, 'learning_rate': 0.011784358899847431, 'max_bin': 645, 'min_data_in_leaf': 50, 'reg_lambda': 40.56973418956961, 'subsample': 0.9437469388661971}. Best is trial 13 with value: 0.7101608935751483.[0m


0:	learn: 0.5942823	test: 0.7398035	best: 0.7398035 (0)	total: 9.34ms	remaining: 3m 6s
1000:	learn: 0.5693213	test: 0.7206087	best: 0.7206087 (1000)	total: 8.01s	remaining: 2m 32s
2000:	learn: 0.5638260	test: 0.7167689	best: 0.7167689 (2000)	total: 15.5s	remaining: 2m 19s
3000:	learn: 0.5605489	test: 0.7146927	best: 0.7146927 (3000)	total: 23.3s	remaining: 2m 12s
4000:	learn: 0.5583352	test: 0.7134645	best: 0.7134645 (4000)	total: 31.6s	remaining: 2m 6s
5000:	learn: 0.5566078	test: 0.7126458	best: 0.7126458 (5000)	total: 39.9s	remaining: 1m 59s
6000:	learn: 0.5551653	test: 0.7120805	best: 0.7120805 (6000)	total: 47.4s	remaining: 1m 50s
7000:	learn: 0.5539095	test: 0.7117082	best: 0.7117082 (7000)	total: 55.3s	remaining: 1m 42s
8000:	learn: 0.5527729	test: 0.7114149	best: 0.7114149 (8000)	total: 1m 3s	remaining: 1m 34s
9000:	learn: 0.5517121	test: 0.7111754	best: 0.7111754 (9000)	total: 1m 12s	remaining: 1m 28s
10000:	learn: 0.5506907	test: 0.7110075	best: 0.7110075 (10000)	total: 1m 19

[32m[I 2021-08-24 07:55:31,594][0m Trial 14 finished with value: 0.7101872679255907 and parameters: {'max_depth': 6, 'learning_rate': 0.011852080280955829, 'max_bin': 644, 'min_data_in_leaf': 50, 'reg_lambda': 40.260881271495656, 'subsample': 0.9437685940601936}. Best is trial 13 with value: 0.7101608935751483.[0m


Number of finished trials: 15
Best trial: {'max_depth': 5, 'learning_rate': 0.011784358899847431, 'max_bin': 645, 'min_data_in_leaf': 50, 'reg_lambda': 40.56973418956961, 'subsample': 0.9437469388661971}


In [14]:
study.best_params

{'max_depth': 5,
 'learning_rate': 0.011784358899847431,
 'max_bin': 645,
 'min_data_in_leaf': 50,
 'reg_lambda': 40.56973418956961,
 'subsample': 0.9437469388661971}

# Log

====== Ordinal encoding =========

 0.7106017728195324 no noise ver2

 0.71414426290903 row-wise noise ver1

 0.7144009762376538 random noise ver3
 
 ====== One-hot encoding =========
 
 0.7104558237409161 no noise ver7
 
 0.71027414342552 no noise ver8 (narrow space)
 
 0.7101568290815834 no noise ver9 (narrow space) final
 
 ========================================== 
 =========== Changed pseudo ===============

====== One-hot encoding =========

0.7106317384292977 no noise ver12
 
0.7103170281381075 no noise ver13 (narrow space)

0.7102441417639822 no noise ver14 (narrow space)