In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error

#import lightgbm as lgb
#import xgboost as xgb
import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'loss'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 10000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
INPUT = Path("../input/tabular-playground-series-aug-2021")

train = pd.read_csv(INPUT / "train.csv")
test = pd.read_csv(INPUT / "test.csv")
submission = pd.read_csv(INPUT / "sample_submission.csv")

# Preprocessing

In [5]:
scale_features = [col for col in test.columns if 'f' in col]

ss = StandardScaler()
train[scale_features] = ss.fit_transform(train[scale_features])
test[scale_features] = ss.transform(test[scale_features])

In [6]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [7]:
pseudo = pd.read_csv("../input/tps-aug-2021-lgbm-xgb-catboost/submission.csv")[target]
test_pseudo = pd.concat([test, pseudo], axis=1)
all_pseudo = pd.concat([train, test_pseudo]).reset_index(drop=True)

# Optuna

In [8]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [9]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo[scale_features], y=all_pseudo[target]):
  """
  """
  param_space = {
                  'bootstrap_type': 'Poisson',
            'loss_function': 'RMSE',
            'eval_metric': 'RMSE',
          'task_type': 'GPU',
          'max_depth':trial.suggest_int('max_depth', 9, 13),
           'learning_rate':trial.suggest_uniform('learning_rate', 6e-3, 8e-3),
          'n_estimators':N_ESTIMATORS,
        'max_bin':trial.suggest_int('max_bin', 407, 487),
             'min_data_in_leaf':trial.suggest_int('min_data_in_leaf', 57, 77),
         'reg_lambda':trial.suggest_uniform('reg_lambda', 0.04, 0.24),
            'subsample': trial.suggest_uniform('subsample', 0.468, 0.588)
                 }
            

  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  ctb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo[scale_features].iloc[trn_idx], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_rn(X_train)

  X_valid, y_valid = all_pseudo[scale_features].iloc[oof_idx], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]

  #start = time.time()
  for inseed in seed_list:
    param_space['random_seed'] = inseed

    model =  ctb.CatBoostRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        use_best_model=True,
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
    )


    ctb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, ctb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [10]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 15)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-21 10:34:45,257][0m A new study created in memory with name: no-name-9d35a29d-812a-4f9a-8700-409ce8e81cf1[0m


0:	learn: 6.3396414	test: 7.8583929	best: 7.8583929 (0)	total: 168ms	remaining: 27m 58s
1000:	learn: 5.7255046	test: 7.7896302	best: 7.7896302 (1000)	total: 2m 34s	remaining: 23m 10s
2000:	learn: 5.2774944	test: 7.7780582	best: 7.7780470 (1999)	total: 5m 6s	remaining: 20m 26s
3000:	learn: 4.8852618	test: 7.7725835	best: 7.7725717 (2993)	total: 7m 37s	remaining: 17m 48s
4000:	learn: 4.5160588	test: 7.7700361	best: 7.7699652 (3974)	total: 10m 10s	remaining: 15m 14s
5000:	learn: 4.1663284	test: 7.7683423	best: 7.7682287 (4982)	total: 12m 43s	remaining: 12m 43s
bestTest = 7.767339299
bestIteration = 5474
Shrink model to first 5475 iterations.
0:	learn: 6.3396084	test: 7.8584233	best: 7.8584233 (0)	total: 156ms	remaining: 26m 3s
1000:	learn: 5.7230081	test: 7.7901554	best: 7.7901554 (1000)	total: 2m 39s	remaining: 23m 54s
2000:	learn: 5.2756285	test: 7.7787080	best: 7.7787080 (2000)	total: 5m 16s	remaining: 21m 4s
3000:	learn: 4.8867034	test: 7.7737228	best: 7.7737228 (3000)	total: 7m 52s	r

[32m[I 2021-08-21 11:06:23,780][0m Trial 0 finished with value: 7.7646494405604765 and parameters: {'max_depth': 13, 'learning_rate': 0.0077858696054715045, 'max_bin': 486, 'min_data_in_leaf': 72, 'reg_lambda': 0.051424000171051004, 'subsample': 0.5559560875184321}. Best is trial 0 with value: 7.7646494405604765.[0m


0:	learn: 6.3400660	test: 7.8584220	best: 7.8584220 (0)	total: 34ms	remaining: 5m 39s
1000:	learn: 6.1367464	test: 7.7921680	best: 7.7921680 (1000)	total: 33.6s	remaining: 5m 1s
2000:	learn: 6.0120299	test: 7.7793358	best: 7.7793358 (2000)	total: 1m 6s	remaining: 4m 26s
3000:	learn: 5.9009533	test: 7.7730913	best: 7.7730913 (3000)	total: 1m 39s	remaining: 3m 51s
4000:	learn: 5.7967192	test: 7.7688533	best: 7.7688533 (4000)	total: 2m 11s	remaining: 3m 17s
5000:	learn: 5.6945533	test: 7.7659804	best: 7.7659804 (5000)	total: 2m 44s	remaining: 2m 44s
6000:	learn: 5.5961965	test: 7.7644694	best: 7.7644694 (6000)	total: 3m 17s	remaining: 2m 11s
bestTest = 7.762944292
bestIteration = 6783
Shrink model to first 6784 iterations.
0:	learn: 6.3400522	test: 7.8584465	best: 7.8584465 (0)	total: 36.2ms	remaining: 6m 1s
1000:	learn: 6.1380280	test: 7.7915798	best: 7.7915798 (1000)	total: 33.8s	remaining: 5m 3s
2000:	learn: 6.0135056	test: 7.7781954	best: 7.7781954 (2000)	total: 1m 7s	remaining: 4m 27

[32m[I 2021-08-21 11:16:01,632][0m Trial 1 finished with value: 7.758298954986045 and parameters: {'max_depth': 10, 'learning_rate': 0.007831840041450305, 'max_bin': 468, 'min_data_in_leaf': 60, 'reg_lambda': 0.15526777559231913, 'subsample': 0.5336435347914744}. Best is trial 1 with value: 7.758298954986045.[0m


0:	learn: 6.3398746	test: 7.8584839	best: 7.8584839 (0)	total: 153ms	remaining: 25m 32s
1000:	learn: 5.8368428	test: 7.7935498	best: 7.7935498 (1000)	total: 2m 29s	remaining: 22m 23s
2000:	learn: 5.4646355	test: 7.7802572	best: 7.7802572 (2000)	total: 4m 56s	remaining: 19m 43s
3000:	learn: 5.1451193	test: 7.7740599	best: 7.7740532 (2970)	total: 7m 21s	remaining: 17m 9s
4000:	learn: 4.8430269	test: 7.7707122	best: 7.7706760 (3953)	total: 9m 46s	remaining: 14m 39s
5000:	learn: 4.5546285	test: 7.7681061	best: 7.7680930 (4987)	total: 12m 12s	remaining: 12m 12s
6000:	learn: 4.2726167	test: 7.7656173	best: 7.7656067 (5999)	total: 14m 40s	remaining: 9m 46s
bestTest = 7.764626748
bestIteration = 6466
Shrink model to first 6467 iterations.
0:	learn: 6.3397747	test: 7.8584576	best: 7.8584576 (0)	total: 148ms	remaining: 24m 35s
1000:	learn: 5.8362769	test: 7.7935332	best: 7.7935207 (998)	total: 2m 29s	remaining: 22m 27s
2000:	learn: 5.4646212	test: 7.7809055	best: 7.7809055 (2000)	total: 4m 57s	r

[32m[I 2021-08-21 11:52:04,017][0m Trial 2 finished with value: 7.761587661465915 and parameters: {'max_depth': 13, 'learning_rate': 0.006391580599278894, 'max_bin': 439, 'min_data_in_leaf': 57, 'reg_lambda': 0.22591874383267782, 'subsample': 0.5227366378846107}. Best is trial 1 with value: 7.758298954986045.[0m


0:	learn: 6.3399879	test: 7.8584769	best: 7.8584769 (0)	total: 83.9ms	remaining: 13m 58s
1000:	learn: 5.9856276	test: 7.7938217	best: 7.7938217 (1000)	total: 1m 28s	remaining: 13m 12s
2000:	learn: 5.7369605	test: 7.7812701	best: 7.7812701 (2000)	total: 2m 54s	remaining: 11m 36s
3000:	learn: 5.5121017	test: 7.7752917	best: 7.7752840 (2997)	total: 4m 19s	remaining: 10m 4s
4000:	learn: 5.3023450	test: 7.7715137	best: 7.7715137 (4000)	total: 5m 44s	remaining: 8m 36s
5000:	learn: 5.0981040	test: 7.7686819	best: 7.7686729 (4992)	total: 7m 10s	remaining: 7m 10s
bestTest = 7.767489523
bestIteration = 5600
Shrink model to first 5601 iterations.
0:	learn: 6.3399751	test: 7.8584941	best: 7.8584941 (0)	total: 94.4ms	remaining: 15m 44s
1000:	learn: 5.9842410	test: 7.7937024	best: 7.7936995 (999)	total: 1m 29s	remaining: 13m 20s
2000:	learn: 5.7306361	test: 7.7802998	best: 7.7802998 (2000)	total: 2m 56s	remaining: 11m 44s
3000:	learn: 5.5123024	test: 7.7741353	best: 7.7741353 (3000)	total: 4m 22s	re

[32m[I 2021-08-21 12:14:45,262][0m Trial 3 finished with value: 7.761981176998604 and parameters: {'max_depth': 12, 'learning_rate': 0.006548734431162558, 'max_bin': 468, 'min_data_in_leaf': 69, 'reg_lambda': 0.14538121154337252, 'subsample': 0.5372354190443043}. Best is trial 1 with value: 7.758298954986045.[0m


0:	learn: 6.3399832	test: 7.8584040	best: 7.8584040 (0)	total: 53.7ms	remaining: 8m 56s
1000:	learn: 6.0583950	test: 7.7912285	best: 7.7912285 (1000)	total: 52.3s	remaining: 7m 50s
2000:	learn: 5.8692461	test: 7.7786102	best: 7.7786025 (1999)	total: 1m 42s	remaining: 6m 50s
3000:	learn: 5.7000211	test: 7.7719230	best: 7.7718980 (2989)	total: 2m 33s	remaining: 5m 58s
4000:	learn: 5.5365691	test: 7.7681080	best: 7.7680792 (3987)	total: 3m 24s	remaining: 5m 6s
5000:	learn: 5.3799756	test: 7.7650875	best: 7.7650875 (5000)	total: 4m 15s	remaining: 4m 15s
6000:	learn: 5.2244928	test: 7.7632924	best: 7.7632754 (5998)	total: 5m 6s	remaining: 3m 24s
7000:	learn: 5.0727159	test: 7.7609711	best: 7.7609637 (6999)	total: 5m 57s	remaining: 2m 33s
8000:	learn: 4.9237078	test: 7.7593456	best: 7.7593389 (7997)	total: 6m 49s	remaining: 1m 42s
9000:	learn: 4.7763903	test: 7.7588383	best: 7.7587582 (8890)	total: 7m 41s	remaining: 51.3s
bestTest = 7.758758245
bestIteration = 8890
Shrink model to first 8891

[32m[I 2021-08-21 12:31:20,995][0m Trial 4 finished with value: 7.756711432376151 and parameters: {'max_depth': 11, 'learning_rate': 0.007698581630599048, 'max_bin': 472, 'min_data_in_leaf': 71, 'reg_lambda': 0.045358981110052994, 'subsample': 0.5023987878338134}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3400855	test: 7.8584401	best: 7.8584401 (0)	total: 32.8ms	remaining: 5m 28s
1000:	learn: 6.1400410	test: 7.7930757	best: 7.7930757 (1000)	total: 32.3s	remaining: 4m 50s
2000:	learn: 6.0154403	test: 7.7795217	best: 7.7795105 (1996)	total: 1m 4s	remaining: 4m 16s
3000:	learn: 5.9064171	test: 7.7730387	best: 7.7730316 (2997)	total: 1m 35s	remaining: 3m 43s
4000:	learn: 5.8032499	test: 7.7687917	best: 7.7687917 (4000)	total: 2m 7s	remaining: 3m 10s
5000:	learn: 5.7008497	test: 7.7657554	best: 7.7657441 (4994)	total: 2m 38s	remaining: 2m 38s
6000:	learn: 5.6015110	test: 7.7633384	best: 7.7633268 (5996)	total: 3m 10s	remaining: 2m 7s
7000:	learn: 5.5028612	test: 7.7614488	best: 7.7614211 (6981)	total: 3m 42s	remaining: 1m 35s
8000:	learn: 5.4056307	test: 7.7600323	best: 7.7599979 (7984)	total: 4m 14s	remaining: 1m 3s
9000:	learn: 5.3101121	test: 7.7592386	best: 7.7591969 (8980)	total: 4m 46s	remaining: 31.8s
9999:	learn: 5.2165206	test: 7.7580300	best: 7.7580223 (9997)	total: 5m 

[32m[I 2021-08-21 12:41:42,373][0m Trial 5 finished with value: 7.7571825171571325 and parameters: {'max_depth': 10, 'learning_rate': 0.007513435782072527, 'max_bin': 423, 'min_data_in_leaf': 64, 'reg_lambda': 0.06468541358977385, 'subsample': 0.5198426299234927}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3402243	test: 7.8585169	best: 7.8585169 (0)	total: 23.2ms	remaining: 3m 51s
1000:	learn: 6.2027305	test: 7.7975643	best: 7.7975573 (999)	total: 22.9s	remaining: 3m 26s
2000:	learn: 6.1279215	test: 7.7835576	best: 7.7835576 (2000)	total: 45.2s	remaining: 3m
3000:	learn: 6.0639279	test: 7.7765749	best: 7.7765723 (2999)	total: 1m 7s	remaining: 2m 36s
4000:	learn: 6.0038337	test: 7.7720911	best: 7.7720857 (3999)	total: 1m 29s	remaining: 2m 14s
5000:	learn: 5.9463387	test: 7.7690728	best: 7.7690728 (5000)	total: 1m 52s	remaining: 1m 51s
6000:	learn: 5.8910748	test: 7.7667554	best: 7.7667483 (5996)	total: 2m 13s	remaining: 1m 29s
7000:	learn: 5.8367818	test: 7.7647786	best: 7.7647712 (6996)	total: 2m 36s	remaining: 1m 6s
8000:	learn: 5.7831998	test: 7.7629889	best: 7.7629889 (8000)	total: 2m 58s	remaining: 44.6s
9000:	learn: 5.7296270	test: 7.7616322	best: 7.7616322 (8998)	total: 3m 20s	remaining: 22.3s
9999:	learn: 5.6773891	test: 7.7598961	best: 7.7598855 (9998)	total: 3m 42s	r

[32m[I 2021-08-21 12:49:23,415][0m Trial 6 finished with value: 7.759223270722371 and parameters: {'max_depth': 9, 'learning_rate': 0.0065893729734609234, 'max_bin': 433, 'min_data_in_leaf': 71, 'reg_lambda': 0.20975510674245768, 'subsample': 0.5246361567678419}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3401634	test: 7.8584782	best: 7.8584782 (0)	total: 23.4ms	remaining: 3m 53s
1000:	learn: 6.1876336	test: 7.7944698	best: 7.7944698 (1000)	total: 23.1s	remaining: 3m 27s
2000:	learn: 6.1022230	test: 7.7803697	best: 7.7803697 (2000)	total: 45.9s	remaining: 3m 3s
3000:	learn: 6.0277789	test: 7.7735118	best: 7.7735118 (3000)	total: 1m 7s	remaining: 2m 37s
4000:	learn: 5.9575021	test: 7.7694916	best: 7.7694884 (3997)	total: 1m 29s	remaining: 2m 14s
5000:	learn: 5.8904706	test: 7.7670045	best: 7.7670026 (4999)	total: 1m 52s	remaining: 1m 52s
6000:	learn: 5.8245150	test: 7.7652384	best: 7.7652384 (6000)	total: 2m 14s	remaining: 1m 29s
7000:	learn: 5.7597106	test: 7.7634530	best: 7.7634312 (6983)	total: 2m 36s	remaining: 1m 7s
8000:	learn: 5.6962630	test: 7.7618866	best: 7.7618821 (7999)	total: 2m 59s	remaining: 44.9s
9000:	learn: 5.6330839	test: 7.7605444	best: 7.7605441 (8979)	total: 3m 21s	remaining: 22.4s
9999:	learn: 5.5706348	test: 7.7591027	best: 7.7590664 (9923)	total: 3m 4

[32m[I 2021-08-21 12:57:09,276][0m Trial 7 finished with value: 7.757494531210339 and parameters: {'max_depth': 9, 'learning_rate': 0.007538745237146731, 'max_bin': 418, 'min_data_in_leaf': 59, 'reg_lambda': 0.06407428073334664, 'subsample': 0.5799470766637287}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3401420	test: 7.8584446	best: 7.8584446 (0)	total: 23ms	remaining: 3m 49s
1000:	learn: 6.1868207	test: 7.7938831	best: 7.7938831 (1000)	total: 22.6s	remaining: 3m 23s
2000:	learn: 6.1027732	test: 7.7802588	best: 7.7802588 (2000)	total: 45.2s	remaining: 3m
3000:	learn: 6.0296979	test: 7.7735625	best: 7.7735625 (3000)	total: 1m 6s	remaining: 2m 35s
4000:	learn: 5.9615636	test: 7.7694329	best: 7.7694088 (3995)	total: 1m 29s	remaining: 2m 13s
5000:	learn: 5.8958388	test: 7.7665050	best: 7.7664877 (4994)	total: 1m 51s	remaining: 1m 51s
6000:	learn: 5.8315706	test: 7.7642286	best: 7.7642199 (5998)	total: 2m 13s	remaining: 1m 29s
7000:	learn: 5.7681968	test: 7.7626433	best: 7.7626270 (6981)	total: 2m 35s	remaining: 1m 6s
8000:	learn: 5.7049044	test: 7.7611147	best: 7.7610864 (7979)	total: 2m 57s	remaining: 44.4s
9000:	learn: 5.6429735	test: 7.7599844	best: 7.7599658 (8999)	total: 3m 20s	remaining: 22.2s
9999:	learn: 5.5820289	test: 7.7587046	best: 7.7587046 (9998)	total: 3m 42s	re

[32m[I 2021-08-21 13:04:50,725][0m Trial 8 finished with value: 7.757432812617756 and parameters: {'max_depth': 9, 'learning_rate': 0.007820689835681733, 'max_bin': 440, 'min_data_in_leaf': 66, 'reg_lambda': 0.20063642755396546, 'subsample': 0.5131878804957468}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3401328	test: 7.8584725	best: 7.8584725 (0)	total: 34.1ms	remaining: 5m 40s
1000:	learn: 6.1510225	test: 7.7954745	best: 7.7954745 (1000)	total: 33s	remaining: 4m 56s
2000:	learn: 6.0348001	test: 7.7813442	best: 7.7813442 (2000)	total: 1m 5s	remaining: 4m 21s
3000:	learn: 5.9328987	test: 7.7754427	best: 7.7754427 (3000)	total: 1m 36s	remaining: 3m 46s
4000:	learn: 5.8369149	test: 7.7711139	best: 7.7711139 (4000)	total: 2m 7s	remaining: 3m 11s
5000:	learn: 5.7442202	test: 7.7678859	best: 7.7678859 (5000)	total: 2m 39s	remaining: 2m 39s
6000:	learn: 5.6545526	test: 7.7655528	best: 7.7655528 (6000)	total: 3m 11s	remaining: 2m 7s
7000:	learn: 5.5646544	test: 7.7637604	best: 7.7637604 (7000)	total: 3m 43s	remaining: 1m 35s
8000:	learn: 5.4753796	test: 7.7624599	best: 7.7624448 (7989)	total: 4m 15s	remaining: 1m 3s
9000:	learn: 5.3869034	test: 7.7612300	best: 7.7612001 (8993)	total: 4m 47s	remaining: 31.9s
9999:	learn: 5.2996798	test: 7.7599751	best: 7.7599353 (9973)	total: 5m 19

[32m[I 2021-08-21 13:15:48,242][0m Trial 9 finished with value: 7.7579018226670655 and parameters: {'max_depth': 10, 'learning_rate': 0.00697738291977645, 'max_bin': 461, 'min_data_in_leaf': 76, 'reg_lambda': 0.04646993497903901, 'subsample': 0.4745284527730163}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3400596	test: 7.8585242	best: 7.8585242 (0)	total: 54.7ms	remaining: 9m 7s
1000:	learn: 6.0753025	test: 7.7932072	best: 7.7932056 (999)	total: 52s	remaining: 7m 47s
2000:	learn: 5.8986307	test: 7.7798871	best: 7.7798871 (2000)	total: 1m 43s	remaining: 6m 53s
3000:	learn: 5.7429084	test: 7.7732485	best: 7.7732263 (2996)	total: 2m 33s	remaining: 5m 58s
4000:	learn: 5.5934378	test: 7.7688080	best: 7.7688080 (4000)	total: 3m 24s	remaining: 5m 6s
5000:	learn: 5.4493050	test: 7.7660652	best: 7.7660610 (4998)	total: 4m 15s	remaining: 4m 15s
6000:	learn: 5.3075350	test: 7.7641139	best: 7.7639704 (5976)	total: 5m 6s	remaining: 3m 24s
7000:	learn: 5.1677603	test: 7.7621683	best: 7.7621214 (6986)	total: 5m 57s	remaining: 2m 33s
8000:	learn: 5.0286302	test: 7.7606713	best: 7.7606180 (7983)	total: 6m 49s	remaining: 1m 42s
bestTest = 7.76061799
bestIteration = 7983
Shrink model to first 7984 iterations.
0:	learn: 6.3400320	test: 7.8584531	best: 7.8584531 (0)	total: 54.4ms	remaining: 9m 3

[32m[I 2021-08-21 13:30:01,542][0m Trial 10 finished with value: 7.759338579562405 and parameters: {'max_depth': 11, 'learning_rate': 0.007138323980886674, 'max_bin': 485, 'min_data_in_leaf': 77, 'reg_lambda': 0.10633075056344596, 'subsample': 0.4794599070068866}. Best is trial 4 with value: 7.756711432376151.[0m


0:	learn: 6.3400069	test: 7.8584449	best: 7.8584449 (0)	total: 48.9ms	remaining: 8m 9s
1000:	learn: 6.0700510	test: 7.7926194	best: 7.7926194 (1000)	total: 47.4s	remaining: 7m 6s
2000:	learn: 5.8865368	test: 7.7797076	best: 7.7797076 (2000)	total: 1m 34s	remaining: 6m 16s
3000:	learn: 5.7204502	test: 7.7732132	best: 7.7732093 (2973)	total: 2m 20s	remaining: 5m 27s
4000:	learn: 5.5638189	test: 7.7689916	best: 7.7689916 (4000)	total: 3m 6s	remaining: 4m 39s
5000:	learn: 5.4112620	test: 7.7663615	best: 7.7662700 (4974)	total: 3m 52s	remaining: 3m 52s
6000:	learn: 5.2617072	test: 7.7642954	best: 7.7642610 (5987)	total: 4m 39s	remaining: 3m 6s
7000:	learn: 5.1165514	test: 7.7626022	best: 7.7626022 (7000)	total: 5m 26s	remaining: 2m 19s
8000:	learn: 4.9728386	test: 7.7611063	best: 7.7610877 (7993)	total: 6m 13s	remaining: 1m 33s
9000:	learn: 4.8314757	test: 7.7595992	best: 7.7595670 (8959)	total: 7m	remaining: 46.7s
9999:	learn: 4.6958674	test: 7.7587345	best: 7.7587129 (9998)	total: 7m 47s	

[32m[I 2021-08-21 13:46:05,514][0m Trial 11 finished with value: 7.756000258000434 and parameters: {'max_depth': 11, 'learning_rate': 0.0073229907325453696, 'max_bin': 408, 'min_data_in_leaf': 64, 'reg_lambda': 0.09207259245356939, 'subsample': 0.49677374687682424}. Best is trial 11 with value: 7.756000258000434.[0m


0:	learn: 6.3400158	test: 7.8584439	best: 7.8584439 (0)	total: 53.4ms	remaining: 8m 53s
1000:	learn: 6.0738116	test: 7.7923027	best: 7.7923027 (1000)	total: 51.2s	remaining: 7m 40s
2000:	learn: 5.8914180	test: 7.7787054	best: 7.7787054 (2000)	total: 1m 41s	remaining: 6m 47s
3000:	learn: 5.7311285	test: 7.7724764	best: 7.7724729 (2998)	total: 2m 32s	remaining: 5m 54s
4000:	learn: 5.5819811	test: 7.7687679	best: 7.7687679 (4000)	total: 3m 21s	remaining: 5m 2s
5000:	learn: 5.4336334	test: 7.7656703	best: 7.7656703 (5000)	total: 4m 12s	remaining: 4m 12s
6000:	learn: 5.2896160	test: 7.7638895	best: 7.7637742 (5900)	total: 5m 3s	remaining: 3m 21s
7000:	learn: 5.1462198	test: 7.7624529	best: 7.7624529 (7000)	total: 5m 53s	remaining: 2m 31s
8000:	learn: 5.0077570	test: 7.7605226	best: 7.7605184 (7985)	total: 6m 44s	remaining: 1m 41s
9000:	learn: 4.8721360	test: 7.7594520	best: 7.7593582 (8928)	total: 7m 36s	remaining: 50.6s
bestTest = 7.758820265
bestIteration = 9475
Shrink model to first 9476

[32m[I 2021-08-21 14:01:51,052][0m Trial 12 finished with value: 7.758353034154606 and parameters: {'max_depth': 11, 'learning_rate': 0.007219802824299046, 'max_bin': 455, 'min_data_in_leaf': 63, 'reg_lambda': 0.09254117231087726, 'subsample': 0.4951782299280497}. Best is trial 11 with value: 7.756000258000434.[0m


0:	learn: 6.3398657	test: 7.8584116	best: 7.8584116 (0)	total: 83.2ms	remaining: 13m 51s
1000:	learn: 5.9452568	test: 7.7912963	best: 7.7912963 (1000)	total: 1m 20s	remaining: 12m 1s
2000:	learn: 5.6628910	test: 7.7781134	best: 7.7781134 (2000)	total: 2m 38s	remaining: 10m 33s
3000:	learn: 5.4070275	test: 7.7721197	best: 7.7721149 (2994)	total: 3m 56s	remaining: 9m 11s
4000:	learn: 5.1661924	test: 7.7686360	best: 7.7685863 (3966)	total: 5m 14s	remaining: 7m 52s
5000:	learn: 4.9322234	test: 7.7667631	best: 7.7666697 (4954)	total: 6m 33s	remaining: 6m 33s
6000:	learn: 4.7074791	test: 7.7641425	best: 7.7641425 (6000)	total: 7m 52s	remaining: 5m 14s
bestTest = 7.763239447
bestIteration = 6484
Shrink model to first 6485 iterations.
0:	learn: 6.3398746	test: 7.8584414	best: 7.8584414 (0)	total: 85ms	remaining: 14m 9s
1000:	learn: 5.9420819	test: 7.7922108	best: 7.7922108 (1000)	total: 1m 20s	remaining: 12m 6s
2000:	learn: 5.6578148	test: 7.7792641	best: 7.7792641 (2000)	total: 2m 39s	remaini

[32m[I 2021-08-21 14:20:20,400][0m Trial 13 finished with value: 7.761581302818221 and parameters: {'max_depth': 12, 'learning_rate': 0.007447018051033308, 'max_bin': 411, 'min_data_in_leaf': 73, 'reg_lambda': 0.10337842419960527, 'subsample': 0.4974502845004783}. Best is trial 11 with value: 7.756000258000434.[0m


0:	learn: 6.3399381	test: 7.8584623	best: 7.8584623 (0)	total: 90.9ms	remaining: 15m 8s
1000:	learn: 5.9708956	test: 7.7940546	best: 7.7940546 (1000)	total: 1m 27s	remaining: 13m 4s
2000:	learn: 5.7071802	test: 7.7819180	best: 7.7819180 (2000)	total: 2m 52s	remaining: 11m 27s
3000:	learn: 5.4729540	test: 7.7762937	best: 7.7762937 (2997)	total: 4m 16s	remaining: 9m 58s
4000:	learn: 5.2534703	test: 7.7724751	best: 7.7724751 (4000)	total: 5m 41s	remaining: 8m 31s
5000:	learn: 5.0386336	test: 7.7698183	best: 7.7697319 (4947)	total: 7m 6s	remaining: 7m 6s
6000:	learn: 4.8312397	test: 7.7675431	best: 7.7675406 (5842)	total: 8m 31s	remaining: 5m 40s
7000:	learn: 4.6293954	test: 7.7658761	best: 7.7658135 (6982)	total: 9m 57s	remaining: 4m 15s
bestTest = 7.765653899
bestIteration = 7087
Shrink model to first 7088 iterations.
0:	learn: 6.3399280	test: 7.8584658	best: 7.8584658 (0)	total: 93.9ms	remaining: 15m 38s
1000:	learn: 5.9713880	test: 7.7924380	best: 7.7924380 (1000)	total: 1m 28s	remaini

[32m[I 2021-08-21 14:44:15,271][0m Trial 14 finished with value: 7.760211716920735 and parameters: {'max_depth': 12, 'learning_rate': 0.0068278579473649405, 'max_bin': 476, 'min_data_in_leaf': 67, 'reg_lambda': 0.08568310524141731, 'subsample': 0.49304030726218606}. Best is trial 11 with value: 7.756000258000434.[0m


Number of finished trials: 15
Best trial: {'max_depth': 11, 'learning_rate': 0.0073229907325453696, 'max_bin': 408, 'min_data_in_leaf': 64, 'reg_lambda': 0.09207259245356939, 'subsample': 0.49677374687682424}


In [11]:
study.best_params

{'max_depth': 11,
 'learning_rate': 0.0073229907325453696,
 'max_bin': 408,
 'min_data_in_leaf': 64,
 'reg_lambda': 0.09207259245356939,
 'subsample': 0.49677374687682424}

# Log

7.768172118850735 row-wise noise ver4

7.776578973194598 random noise ver5

7.757181558352988 no noise ver7

7.756073089003697 no noise ver8 (narrow space)
