In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

#import lightgbm as lgb
#import xgboost as xgb
import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
# Low MI scores
for pp in ["cat2","cat3","cat4","cat6"]:
    train.pop(pp)
    test.pop(pp)
train.head()

Unnamed: 0,id,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,E,C,N,0.20147,-0.014822,0.669699,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,D,F,A,O,0.743068,0.367411,1.021605,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,D,D,A,F,0.742708,0.310383,-0.012673,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,D,E,C,K,0.429551,0.620998,0.577942,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,D,E,A,N,1.058291,0.367492,-0.052389,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [6]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,E,C,N,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,B,B,D,F,A,O,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,D,D,A,F,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,D,E,C,K,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,A,A,D,E,A,N,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


# Preprocessing

In [7]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()


Unnamed: 0,cat0,cat1,cat5,cat7,cat8,cat9,cont0,cont1,cont2,cont3,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,1.0,1.0,1.0,4.0,2.0,13.0,0.20147,-0.014822,0.669699,0.136278,0.610706,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985
1,1.0,1.0,3.0,5.0,0.0,14.0,0.743068,0.367411,1.021605,0.365798,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,0.0,0.0,3.0,3.0,0.0,5.0,0.742708,0.310383,-0.012673,0.576957,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,1.0,1.0,3.0,4.0,2.0,10.0,0.429551,0.620998,0.577942,0.28061,0.284667,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682
4,0.0,0.0,3.0,4.0,0.0,13.0,1.058291,0.367492,-0.052389,0.232407,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823


In [8]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()
'''

'\n# List of categorical columns\nobject_cols = [col for col in features.columns if \'cat\' in col]\n\n# one-hot-encode categorical columns\nX = features.copy()\nX_test = test.drop([\'id\'], axis=1).copy()\n\noh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")\nX_ohe = oh_encoder.fit_transform(features[object_cols])\nX_test_ohe = oh_encoder.transform(test[object_cols])\n\nX_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])\nX_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])\n\nX = pd.concat([X, X_ohe], axis=1)\nX_test = pd.concat([X_test, X_test_ohe], axis=1)\nX = X.drop(object_cols, axis=1)\nX_test = X_test.drop(object_cols, axis=1)\n    \n# Preview the one-hot-encoded features\nX.head()\n'

In [9]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [10]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [11]:
pseudo = pd.read_csv("../input/30-days-pseudo/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [12]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [13]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
               'bootstrap_type': 'Poisson',
            'loss_function': 'RMSE',
            'eval_metric': 'RMSE',
          'task_type': 'GPU',
          'max_depth':trial.suggest_int('max_depth', 1, 3),
           'learning_rate':trial.suggest_uniform('learning_rate', 3.81e-2, 4.01e-2),
          'n_estimators':N_ESTIMATORS,
        'max_bin':trial.suggest_int('max_bin', 702, 714),
             'min_data_in_leaf':trial.suggest_int('min_data_in_leaf', 155, 163),
         'reg_lambda':trial.suggest_uniform('reg_lambda', 30.5, 33),
            'subsample': trial.suggest_uniform('subsample', 0.97, 1)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  ctb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_row(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_seed'] = inseed

    model = ctb.CatBoostRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        use_best_model=True,
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE
       
    )


    ctb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, ctb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [14]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 15)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-23 09:32:27,528][0m A new study created in memory with name: no-name-22d33f7f-b301-4345-a87d-dcf7a9668380[0m


0:	learn: 0.5936750	test: 0.7396882	best: 0.7396882 (0)	total: 13.3ms	remaining: 4m 26s
1000:	learn: 0.5699922	test: 0.7212405	best: 0.7212405 (1000)	total: 4.99s	remaining: 1m 34s
2000:	learn: 0.5652288	test: 0.7177440	best: 0.7177440 (2000)	total: 10.1s	remaining: 1m 30s
3000:	learn: 0.5621845	test: 0.7156277	best: 0.7156271 (2998)	total: 15s	remaining: 1m 25s
4000:	learn: 0.5601181	test: 0.7141786	best: 0.7141782 (3999)	total: 19.8s	remaining: 1m 19s
5000:	learn: 0.5585889	test: 0.7131613	best: 0.7131612 (4998)	total: 24.8s	remaining: 1m 14s
6000:	learn: 0.5574054	test: 0.7123903	best: 0.7123903 (6000)	total: 29.5s	remaining: 1m 8s
7000:	learn: 0.5565157	test: 0.7118855	best: 0.7118849 (6999)	total: 34.6s	remaining: 1m 4s
8000:	learn: 0.5557899	test: 0.7115065	best: 0.7115060 (7995)	total: 39.7s	remaining: 59.5s
9000:	learn: 0.5551444	test: 0.7111857	best: 0.7111857 (9000)	total: 45s	remaining: 55s
10000:	learn: 0.5546318	test: 0.7109431	best: 0.7109431 (9999)	total: 49.7s	remaining

[32m[I 2021-08-23 09:36:43,573][0m Trial 0 finished with value: 0.7099791464101343 and parameters: {'max_depth': 2, 'learning_rate': 0.03992686599010366, 'max_bin': 713, 'min_data_in_leaf': 156, 'reg_lambda': 31.904112168897683, 'subsample': 0.9804663418450672}. Best is trial 0 with value: 0.7099791464101343.[0m


0:	learn: 0.5937740	test: 0.7397706	best: 0.7397706 (0)	total: 4.58ms	remaining: 1m 31s
1000:	learn: 0.5763423	test: 0.7260146	best: 0.7260146 (1000)	total: 3.56s	remaining: 1m 7s
2000:	learn: 0.5730251	test: 0.7234051	best: 0.7234051 (2000)	total: 7.44s	remaining: 1m 6s
3000:	learn: 0.5713599	test: 0.7221322	best: 0.7221322 (3000)	total: 11s	remaining: 1m 2s
4000:	learn: 0.5701537	test: 0.7212135	best: 0.7212135 (4000)	total: 14.9s	remaining: 59.6s
5000:	learn: 0.5692035	test: 0.7205002	best: 0.7205002 (5000)	total: 19.6s	remaining: 58.7s
6000:	learn: 0.5684258	test: 0.7199279	best: 0.7199279 (6000)	total: 23.5s	remaining: 54.7s
7000:	learn: 0.5677742	test: 0.7194488	best: 0.7194488 (7000)	total: 27.6s	remaining: 51.2s
8000:	learn: 0.5672112	test: 0.7190390	best: 0.7190390 (8000)	total: 31.4s	remaining: 47.1s
9000:	learn: 0.5667178	test: 0.7186791	best: 0.7186791 (9000)	total: 34.9s	remaining: 42.7s
10000:	learn: 0.5662807	test: 0.7183671	best: 0.7183669 (9999)	total: 38.9s	remaining:

[32m[I 2021-08-23 09:39:26,863][0m Trial 1 finished with value: 0.7162813067611202 and parameters: {'max_depth': 1, 'learning_rate': 0.03994305832383072, 'max_bin': 711, 'min_data_in_leaf': 156, 'reg_lambda': 32.237581866047876, 'subsample': 0.986035796530317}. Best is trial 0 with value: 0.7099791464101343.[0m


0:	learn: 0.5935732	test: 0.7396053	best: 0.7396053 (0)	total: 6.6ms	remaining: 2m 12s
1000:	learn: 0.5661714	test: 0.7185014	best: 0.7185014 (1000)	total: 5.37s	remaining: 1m 41s
2000:	learn: 0.5607529	test: 0.7146995	best: 0.7146995 (2000)	total: 11.1s	remaining: 1m 40s
3000:	learn: 0.5579516	test: 0.7129375	best: 0.7129369 (2999)	total: 17.5s	remaining: 1m 39s
4000:	learn: 0.5561465	test: 0.7119586	best: 0.7119586 (4000)	total: 22.8s	remaining: 1m 30s
5000:	learn: 0.5548370	test: 0.7113289	best: 0.7113289 (5000)	total: 28.5s	remaining: 1m 25s
6000:	learn: 0.5537522	test: 0.7109486	best: 0.7109486 (6000)	total: 34s	remaining: 1m 19s
7000:	learn: 0.5528191	test: 0.7106552	best: 0.7106552 (6999)	total: 39.6s	remaining: 1m 13s
8000:	learn: 0.5520067	test: 0.7104668	best: 0.7104668 (8000)	total: 45s	remaining: 1m 7s
9000:	learn: 0.5512638	test: 0.7103108	best: 0.7103106 (8998)	total: 50.8s	remaining: 1m 2s
10000:	learn: 0.5505859	test: 0.7102000	best: 0.7101942 (9979)	total: 56.8s	remain

[32m[I 2021-08-23 09:41:46,839][0m Trial 2 finished with value: 0.7100947650641626 and parameters: {'max_depth': 3, 'learning_rate': 0.03919027534165944, 'max_bin': 703, 'min_data_in_leaf': 163, 'reg_lambda': 31.22737993246908, 'subsample': 0.9779975999423983}. Best is trial 0 with value: 0.7099791464101343.[0m


0:	learn: 0.5937819	test: 0.7397753	best: 0.7397753 (0)	total: 4.82ms	remaining: 1m 36s
1000:	learn: 0.5765587	test: 0.7261967	best: 0.7261967 (1000)	total: 3.84s	remaining: 1m 12s
2000:	learn: 0.5731957	test: 0.7235261	best: 0.7235261 (2000)	total: 7.69s	remaining: 1m 9s
3000:	learn: 0.5715227	test: 0.7222376	best: 0.7222376 (3000)	total: 11.9s	remaining: 1m 7s
4000:	learn: 0.5703199	test: 0.7213328	best: 0.7213328 (4000)	total: 16.5s	remaining: 1m 6s
5000:	learn: 0.5693693	test: 0.7206139	best: 0.7206139 (5000)	total: 20.7s	remaining: 1m 1s
6000:	learn: 0.5685923	test: 0.7200440	best: 0.7200440 (6000)	total: 24.3s	remaining: 56.6s
7000:	learn: 0.5679378	test: 0.7195645	best: 0.7195645 (7000)	total: 28.1s	remaining: 52.2s
8000:	learn: 0.5673782	test: 0.7191550	best: 0.7191550 (8000)	total: 32.2s	remaining: 48.2s
9000:	learn: 0.5668823	test: 0.7187972	best: 0.7187972 (9000)	total: 36s	remaining: 44s
10000:	learn: 0.5664425	test: 0.7184727	best: 0.7184727 (10000)	total: 39.9s	remaining:

[32m[I 2021-08-23 09:44:31,584][0m Trial 3 finished with value: 0.716391431551238 and parameters: {'max_depth': 1, 'learning_rate': 0.038410409201674205, 'max_bin': 704, 'min_data_in_leaf': 162, 'reg_lambda': 32.32357258472797, 'subsample': 0.9906964891414973}. Best is trial 0 with value: 0.7099791464101343.[0m


0:	learn: 0.5936877	test: 0.7396984	best: 0.7396984 (0)	total: 5.91ms	remaining: 1m 58s
1000:	learn: 0.5703402	test: 0.7215309	best: 0.7215309 (1000)	total: 4.69s	remaining: 1m 29s
2000:	learn: 0.5653992	test: 0.7179079	best: 0.7179079 (2000)	total: 9.83s	remaining: 1m 28s
3000:	learn: 0.5622824	test: 0.7156821	best: 0.7156821 (3000)	total: 15s	remaining: 1m 24s
4000:	learn: 0.5601779	test: 0.7141928	best: 0.7141928 (4000)	total: 19.8s	remaining: 1m 18s
5000:	learn: 0.5586588	test: 0.7131989	best: 0.7131989 (5000)	total: 24.7s	remaining: 1m 14s
6000:	learn: 0.5575264	test: 0.7124851	best: 0.7124847 (5999)	total: 29.3s	remaining: 1m 8s
7000:	learn: 0.5566284	test: 0.7119644	best: 0.7119644 (7000)	total: 34.4s	remaining: 1m 3s
8000:	learn: 0.5559199	test: 0.7115807	best: 0.7115799 (7997)	total: 39.1s	remaining: 58.6s
9000:	learn: 0.5552813	test: 0.7112588	best: 0.7112587 (8998)	total: 44.7s	remaining: 54.6s
10000:	learn: 0.5547376	test: 0.7110199	best: 0.7110199 (10000)	total: 49.7s	rema

[32m[I 2021-08-23 09:47:55,703][0m Trial 4 finished with value: 0.7099765584461286 and parameters: {'max_depth': 2, 'learning_rate': 0.038339035389420566, 'max_bin': 709, 'min_data_in_leaf': 162, 'reg_lambda': 31.28570384256541, 'subsample': 0.9961389884057417}. Best is trial 4 with value: 0.7099765584461286.[0m


0:	learn: 0.5936788	test: 0.7396902	best: 0.7396902 (0)	total: 5.67ms	remaining: 1m 53s
1000:	learn: 0.5701084	test: 0.7213187	best: 0.7213187 (1000)	total: 5.07s	remaining: 1m 36s
2000:	learn: 0.5653020	test: 0.7178032	best: 0.7178032 (2000)	total: 10.4s	remaining: 1m 33s
3000:	learn: 0.5622309	test: 0.7156345	best: 0.7156345 (3000)	total: 15.3s	remaining: 1m 26s
4000:	learn: 0.5601478	test: 0.7141261	best: 0.7141261 (4000)	total: 20s	remaining: 1m 20s
5000:	learn: 0.5586510	test: 0.7131567	best: 0.7131567 (5000)	total: 25s	remaining: 1m 14s
6000:	learn: 0.5575300	test: 0.7125129	best: 0.7125129 (6000)	total: 29.8s	remaining: 1m 9s
7000:	learn: 0.5566452	test: 0.7119804	best: 0.7119800 (6998)	total: 34.5s	remaining: 1m 4s
8000:	learn: 0.5559170	test: 0.7115994	best: 0.7115994 (8000)	total: 40.2s	remaining: 1m
9000:	learn: 0.5552913	test: 0.7112907	best: 0.7112900 (8995)	total: 44.7s	remaining: 54.6s
10000:	learn: 0.5547359	test: 0.7110069	best: 0.7110059 (9994)	total: 49.7s	remaining:

[32m[I 2021-08-23 09:51:17,708][0m Trial 5 finished with value: 0.709980260957846 and parameters: {'max_depth': 2, 'learning_rate': 0.039495103296372736, 'max_bin': 704, 'min_data_in_leaf': 162, 'reg_lambda': 32.885519077825826, 'subsample': 0.9748264399823461}. Best is trial 4 with value: 0.7099765584461286.[0m


0:	learn: 0.5937778	test: 0.7397721	best: 0.7397721 (0)	total: 4.53ms	remaining: 1m 30s
1000:	learn: 0.5764577	test: 0.7261325	best: 0.7261325 (1000)	total: 3.64s	remaining: 1m 9s
2000:	learn: 0.5731177	test: 0.7234764	best: 0.7234764 (2000)	total: 7.28s	remaining: 1m 5s
3000:	learn: 0.5714430	test: 0.7221943	best: 0.7221943 (3000)	total: 11.4s	remaining: 1m 4s
4000:	learn: 0.5702375	test: 0.7212766	best: 0.7212766 (4000)	total: 15s	remaining: 1m
5000:	learn: 0.5692857	test: 0.7205634	best: 0.7205634 (5000)	total: 18.7s	remaining: 56.2s
6000:	learn: 0.5685067	test: 0.7199836	best: 0.7199836 (6000)	total: 22.7s	remaining: 53s
7000:	learn: 0.5678552	test: 0.7195056	best: 0.7195056 (7000)	total: 26.3s	remaining: 48.8s
8000:	learn: 0.5672939	test: 0.7190970	best: 0.7190970 (8000)	total: 29.9s	remaining: 44.9s
9000:	learn: 0.5667977	test: 0.7187333	best: 0.7187333 (9000)	total: 34.6s	remaining: 42.3s
10000:	learn: 0.5663583	test: 0.7184193	best: 0.7184193 (10000)	total: 38.1s	remaining: 38.

[32m[I 2021-08-23 09:54:01,047][0m Trial 6 finished with value: 0.7163253586529963 and parameters: {'max_depth': 1, 'learning_rate': 0.039091434516654006, 'max_bin': 713, 'min_data_in_leaf': 161, 'reg_lambda': 31.634361109975575, 'subsample': 0.9962862801205974}. Best is trial 4 with value: 0.7099765584461286.[0m


0:	learn: 0.5936797	test: 0.7396915	best: 0.7396915 (0)	total: 11.1ms	remaining: 3m 42s
1000:	learn: 0.5701170	test: 0.7213195	best: 0.7213195 (1000)	total: 5.49s	remaining: 1m 44s
2000:	learn: 0.5653037	test: 0.7178058	best: 0.7178058 (2000)	total: 10.2s	remaining: 1m 31s
3000:	learn: 0.5622495	test: 0.7156299	best: 0.7156290 (2999)	total: 15.1s	remaining: 1m 25s
4000:	learn: 0.5601644	test: 0.7141996	best: 0.7141996 (4000)	total: 19.9s	remaining: 1m 19s
5000:	learn: 0.5587044	test: 0.7132051	best: 0.7132051 (5000)	total: 24.9s	remaining: 1m 14s
6000:	learn: 0.5575517	test: 0.7124839	best: 0.7124839 (6000)	total: 29.6s	remaining: 1m 9s
7000:	learn: 0.5566658	test: 0.7119823	best: 0.7119823 (7000)	total: 35.3s	remaining: 1m 5s
8000:	learn: 0.5559251	test: 0.7115954	best: 0.7115942 (7994)	total: 40s	remaining: 59.9s
9000:	learn: 0.5552881	test: 0.7112617	best: 0.7112613 (8996)	total: 44.8s	remaining: 54.8s
10000:	learn: 0.5547503	test: 0.7110233	best: 0.7110228 (9997)	total: 49.5s	remai

[32m[I 2021-08-23 09:57:25,677][0m Trial 7 finished with value: 0.7099481476777582 and parameters: {'max_depth': 2, 'learning_rate': 0.03935065398409243, 'max_bin': 702, 'min_data_in_leaf': 156, 'reg_lambda': 32.17564497305224, 'subsample': 0.9804494017397438}. Best is trial 7 with value: 0.7099481476777582.[0m


0:	learn: 0.5937735	test: 0.7397681	best: 0.7397681 (0)	total: 4.92ms	remaining: 1m 38s
1000:	learn: 0.5763470	test: 0.7260247	best: 0.7260247 (1000)	total: 3.54s	remaining: 1m 7s
2000:	learn: 0.5730297	test: 0.7234076	best: 0.7234076 (2000)	total: 7.72s	remaining: 1m 9s
3000:	learn: 0.5713620	test: 0.7221176	best: 0.7221176 (3000)	total: 11.7s	remaining: 1m 6s
4000:	learn: 0.5701554	test: 0.7212089	best: 0.7212089 (4000)	total: 15.5s	remaining: 1m 1s
5000:	learn: 0.5692017	test: 0.7204925	best: 0.7204925 (5000)	total: 19.6s	remaining: 58.7s
6000:	learn: 0.5684250	test: 0.7199199	best: 0.7199199 (6000)	total: 23.4s	remaining: 54.6s
7000:	learn: 0.5677720	test: 0.7194410	best: 0.7194410 (6999)	total: 28.3s	remaining: 52.5s
8000:	learn: 0.5672127	test: 0.7190389	best: 0.7190389 (8000)	total: 32.1s	remaining: 48.1s
9000:	learn: 0.5667194	test: 0.7186806	best: 0.7186806 (9000)	total: 35.8s	remaining: 43.7s
10000:	learn: 0.5662816	test: 0.7183625	best: 0.7183625 (10000)	total: 39.7s	remaini

[32m[I 2021-08-23 10:00:12,566][0m Trial 8 finished with value: 0.7162713718681779 and parameters: {'max_depth': 1, 'learning_rate': 0.03992574058796399, 'max_bin': 705, 'min_data_in_leaf': 162, 'reg_lambda': 32.66070422828422, 'subsample': 0.9891455367042591}. Best is trial 7 with value: 0.7099481476777582.[0m


0:	learn: 0.5937728	test: 0.7397674	best: 0.7397674 (0)	total: 4.64ms	remaining: 1m 32s
1000:	learn: 0.5763327	test: 0.7260179	best: 0.7260179 (1000)	total: 3.91s	remaining: 1m 14s
2000:	learn: 0.5730194	test: 0.7233897	best: 0.7233897 (2000)	total: 7.58s	remaining: 1m 8s
3000:	learn: 0.5713544	test: 0.7221153	best: 0.7221153 (3000)	total: 11.1s	remaining: 1m 3s
4000:	learn: 0.5701491	test: 0.7212045	best: 0.7212045 (4000)	total: 15.2s	remaining: 1m
5000:	learn: 0.5691959	test: 0.7204863	best: 0.7204863 (5000)	total: 18.8s	remaining: 56.3s
6000:	learn: 0.5684183	test: 0.7199071	best: 0.7199071 (6000)	total: 23s	remaining: 53.6s
7000:	learn: 0.5677691	test: 0.7194365	best: 0.7194365 (7000)	total: 26.9s	remaining: 50s
8000:	learn: 0.5672094	test: 0.7190234	best: 0.7190234 (8000)	total: 30.5s	remaining: 45.8s
9000:	learn: 0.5667163	test: 0.7186657	best: 0.7186657 (9000)	total: 34.1s	remaining: 41.6s
10000:	learn: 0.5662770	test: 0.7183570	best: 0.7183570 (10000)	total: 38.6s	remaining: 38

[32m[I 2021-08-23 10:02:56,546][0m Trial 9 finished with value: 0.7162737351337362 and parameters: {'max_depth': 1, 'learning_rate': 0.04004572650189559, 'max_bin': 712, 'min_data_in_leaf': 156, 'reg_lambda': 31.610662173415342, 'subsample': 0.9804486514184947}. Best is trial 7 with value: 0.7099481476777582.[0m


0:	learn: 0.5935768	test: 0.7396091	best: 0.7396091 (0)	total: 6.49ms	remaining: 2m 9s
1000:	learn: 0.5662714	test: 0.7185585	best: 0.7185585 (1000)	total: 5.88s	remaining: 1m 51s
2000:	learn: 0.5608367	test: 0.7147400	best: 0.7147400 (2000)	total: 11.3s	remaining: 1m 41s
3000:	learn: 0.5580180	test: 0.7129443	best: 0.7129443 (3000)	total: 17.2s	remaining: 1m 37s
4000:	learn: 0.5562435	test: 0.7119602	best: 0.7119602 (4000)	total: 23.3s	remaining: 1m 33s
5000:	learn: 0.5548803	test: 0.7113214	best: 0.7113214 (5000)	total: 29.2s	remaining: 1m 27s
6000:	learn: 0.5538118	test: 0.7109250	best: 0.7109250 (5998)	total: 34.7s	remaining: 1m 20s
7000:	learn: 0.5528969	test: 0.7106416	best: 0.7106413 (6999)	total: 40.7s	remaining: 1m 15s
8000:	learn: 0.5521095	test: 0.7104731	best: 0.7104726 (7999)	total: 46.1s	remaining: 1m 9s
9000:	learn: 0.5513609	test: 0.7103102	best: 0.7103089 (8984)	total: 51.8s	remaining: 1m 3s
10000:	learn: 0.5506854	test: 0.7101789	best: 0.7101789 (10000)	total: 58s	rem

[32m[I 2021-08-23 10:05:13,930][0m Trial 10 finished with value: 0.7101136128928449 and parameters: {'max_depth': 3, 'learning_rate': 0.03888796305674772, 'max_bin': 707, 'min_data_in_leaf': 158, 'reg_lambda': 30.66257209558469, 'subsample': 0.9719179388157723}. Best is trial 7 with value: 0.7099481476777582.[0m


0:	learn: 0.5936891	test: 0.7397006	best: 0.7397006 (0)	total: 5.72ms	remaining: 1m 54s
1000:	learn: 0.5704295	test: 0.7215627	best: 0.7215627 (1000)	total: 4.71s	remaining: 1m 29s
2000:	learn: 0.5654232	test: 0.7179038	best: 0.7179038 (2000)	total: 9.32s	remaining: 1m 23s
3000:	learn: 0.5622954	test: 0.7156786	best: 0.7156784 (2998)	total: 14.2s	remaining: 1m 20s
4000:	learn: 0.5601736	test: 0.7141823	best: 0.7141821 (3999)	total: 19.6s	remaining: 1m 18s
5000:	learn: 0.5585778	test: 0.7131118	best: 0.7131118 (5000)	total: 24.7s	remaining: 1m 13s
6000:	learn: 0.5574704	test: 0.7124219	best: 0.7124218 (5999)	total: 29.2s	remaining: 1m 8s
7000:	learn: 0.5566008	test: 0.7119465	best: 0.7119465 (7000)	total: 34.5s	remaining: 1m 4s
8000:	learn: 0.5558618	test: 0.7115556	best: 0.7115549 (7999)	total: 39.3s	remaining: 58.9s
9000:	learn: 0.5552186	test: 0.7112275	best: 0.7112275 (9000)	total: 44.3s	remaining: 54.2s
10000:	learn: 0.5546501	test: 0.7109546	best: 0.7109542 (9999)	total: 49.1s	rem

[32m[I 2021-08-23 10:08:41,697][0m Trial 11 finished with value: 0.7098848674925469 and parameters: {'max_depth': 2, 'learning_rate': 0.03816667154616651, 'max_bin': 709, 'min_data_in_leaf': 159, 'reg_lambda': 30.922021895077272, 'subsample': 0.9998995160334373}. Best is trial 11 with value: 0.7098848674925469.[0m


0:	learn: 0.5936784	test: 0.7396914	best: 0.7396914 (0)	total: 8.43ms	remaining: 2m 48s
1000:	learn: 0.5701752	test: 0.7213795	best: 0.7213795 (1000)	total: 4.64s	remaining: 1m 28s
2000:	learn: 0.5652754	test: 0.7177796	best: 0.7177796 (2000)	total: 9.15s	remaining: 1m 22s
3000:	learn: 0.5621950	test: 0.7155752	best: 0.7155752 (3000)	total: 14.7s	remaining: 1m 23s
4000:	learn: 0.5601182	test: 0.7141426	best: 0.7141426 (4000)	total: 19.6s	remaining: 1m 18s
5000:	learn: 0.5585918	test: 0.7131118	best: 0.7131107 (4999)	total: 24.7s	remaining: 1m 14s
6000:	learn: 0.5574855	test: 0.7124181	best: 0.7124181 (6000)	total: 29.4s	remaining: 1m 8s
7000:	learn: 0.5565839	test: 0.7119166	best: 0.7119166 (7000)	total: 34.3s	remaining: 1m 3s
8000:	learn: 0.5558393	test: 0.7115281	best: 0.7115281 (8000)	total: 39s	remaining: 58.5s
9000:	learn: 0.5552228	test: 0.7112340	best: 0.7112330 (8995)	total: 44.8s	remaining: 54.7s
10000:	learn: 0.5546790	test: 0.7109627	best: 0.7109627 (10000)	total: 49.5s	rema

[32m[I 2021-08-23 10:12:01,336][0m Trial 12 finished with value: 0.709970923144929 and parameters: {'max_depth': 2, 'learning_rate': 0.039520074578811314, 'max_bin': 709, 'min_data_in_leaf': 159, 'reg_lambda': 30.59235240966179, 'subsample': 0.9830634998955964}. Best is trial 11 with value: 0.7098848674925469.[0m


0:	learn: 0.5935846	test: 0.7396151	best: 0.7396151 (0)	total: 6.58ms	remaining: 2m 11s
1000:	learn: 0.5663132	test: 0.7185930	best: 0.7185925 (999)	total: 5.52s	remaining: 1m 44s
2000:	learn: 0.5608994	test: 0.7148084	best: 0.7148080 (1999)	total: 12s	remaining: 1m 48s
3000:	learn: 0.5580790	test: 0.7130281	best: 0.7130281 (3000)	total: 17.5s	remaining: 1m 39s
4000:	learn: 0.5562558	test: 0.7119679	best: 0.7119679 (4000)	total: 23.6s	remaining: 1m 34s
5000:	learn: 0.5548805	test: 0.7112925	best: 0.7112925 (5000)	total: 29.2s	remaining: 1m 27s
6000:	learn: 0.5537993	test: 0.7108882	best: 0.7108882 (6000)	total: 35.1s	remaining: 1m 21s
7000:	learn: 0.5528914	test: 0.7106410	best: 0.7106375 (6959)	total: 40.7s	remaining: 1m 15s
8000:	learn: 0.5520866	test: 0.7104540	best: 0.7104538 (7999)	total: 47.4s	remaining: 1m 11s
9000:	learn: 0.5513444	test: 0.7103223	best: 0.7103223 (9000)	total: 53.3s	remaining: 1m 5s
10000:	learn: 0.5506628	test: 0.7102146	best: 0.7102141 (9997)	total: 59s	remai

[32m[I 2021-08-23 10:14:47,991][0m Trial 13 finished with value: 0.7099422956839776 and parameters: {'max_depth': 3, 'learning_rate': 0.038102402229793116, 'max_bin': 707, 'min_data_in_leaf': 158, 'reg_lambda': 30.97064342790306, 'subsample': 0.9897232210275898}. Best is trial 11 with value: 0.7098848674925469.[0m


0:	learn: 0.5935847	test: 0.7396147	best: 0.7396147 (0)	total: 6.74ms	remaining: 2m 14s
1000:	learn: 0.5663358	test: 0.7186078	best: 0.7186078 (1000)	total: 5.59s	remaining: 1m 46s
2000:	learn: 0.5607921	test: 0.7147368	best: 0.7147368 (2000)	total: 11.5s	remaining: 1m 43s
3000:	learn: 0.5579561	test: 0.7128835	best: 0.7128831 (2999)	total: 17.7s	remaining: 1m 40s
4000:	learn: 0.5561238	test: 0.7118395	best: 0.7118395 (4000)	total: 23.8s	remaining: 1m 35s
5000:	learn: 0.5547499	test: 0.7111854	best: 0.7111854 (5000)	total: 29.3s	remaining: 1m 28s
6000:	learn: 0.5536427	test: 0.7107694	best: 0.7107694 (6000)	total: 35s	remaining: 1m 21s
7000:	learn: 0.5527066	test: 0.7104999	best: 0.7104960 (6981)	total: 40.9s	remaining: 1m 15s
8000:	learn: 0.5518972	test: 0.7103363	best: 0.7103349 (7994)	total: 47.2s	remaining: 1m 10s
9000:	learn: 0.5511496	test: 0.7102040	best: 0.7102038 (8999)	total: 53.1s	remaining: 1m 4s
10000:	learn: 0.5504493	test: 0.7101053	best: 0.7101044 (9997)	total: 58.7s	re

[32m[I 2021-08-23 10:17:25,645][0m Trial 14 finished with value: 0.7099177681639608 and parameters: {'max_depth': 3, 'learning_rate': 0.03813314986999626, 'max_bin': 707, 'min_data_in_leaf': 159, 'reg_lambda': 30.979310565754343, 'subsample': 0.9999299919229125}. Best is trial 11 with value: 0.7098848674925469.[0m


Number of finished trials: 15
Best trial: {'max_depth': 2, 'learning_rate': 0.03816667154616651, 'max_bin': 709, 'min_data_in_leaf': 159, 'reg_lambda': 30.922021895077272, 'subsample': 0.9998995160334373}


In [15]:
study.best_params

{'max_depth': 2,
 'learning_rate': 0.03816667154616651,
 'max_bin': 709,
 'min_data_in_leaf': 159,
 'reg_lambda': 30.922021895077272,
 'subsample': 0.9998995160334373}

# Log

====== Ordinal encoding =========

0.7104169071824754 no noise ver3

0.7100365351403024 no noise ver6 (narrow space)

0.7099791926764689 no noise ver7 (narrow space)

0.7099021667949135 no noise ver8 (narrow space)

0.7098886176837581 no noise ver10 (narrow space)

0.7146235117715432 random noise ver4

0.7139039381188639 row-wise noise ver5

 ====== One-hot encoding =========
 
0.7104321496790325 no noise ver2
 

 