In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'loss'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 10000
    EARLY_STOPPING_ROUNDS = 200
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
INPUT = Path("../input/tabular-playground-series-aug-2021")

train = pd.read_csv(INPUT / "train.csv")
test = pd.read_csv(INPUT / "test.csv")
submission = pd.read_csv(INPUT / "sample_submission.csv")

In [5]:
train['lgb_pred'] = np.load("../input/tps-aug-2021-lgb/lgb_oof.npy")
test['lgb_pred'] = np.load("../input/tps-aug-2021-lgb/lgb_pred.npy")

train['xgb_pred'] = np.load("../input/tps-aug-2021-xgb/xgb_oof.npy")
test['xgb_pred'] = np.load("../input/tps-aug-2021-xgb/xgb_pred.npy")

train['xgb2_pred'] = np.load("../input/tps-aug-2021-xgb-pseudo2-pred/3xgb_oof.npy")+np.load("../input/tps-aug-2021-xgb-pseudo2-pred/4xgb_oof.npy")
test['xgb2_pred'] = np.load("../input/tps-aug-2021-xgb-pseudo2-pred/3xgb_pred.npy")+np.load("../input/tps-aug-2021-xgb-pseudo2-pred/4xgb_pred.npy")

train['ctb2_pred'] = np.load("../input/tps-aug-2021-catb-pseudo2/ctb_oof.npy")
test['ctb2_pred'] = np.load("../input/tps-aug-2021-catb-pseudo2/ctb_pred.npy")


# Preprocessing

In [6]:
scale_features = [col for col in test.columns if 'f' in col]

ss = StandardScaler()
train[scale_features] = ss.fit_transform(train[scale_features])
test[scale_features] = ss.transform(test[scale_features])

In [7]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [8]:
pseudo = pd.read_csv("../input/blending-tool-tps-aug-2021/blend.csv/0.part")[target]
test_pseudo = pd.concat([test, pseudo], axis=1)
all_pseudo = pd.concat([train, test_pseudo]).reset_index(drop=True)

In [9]:
useful_features = scale_features + ['lgb_pred', 'xgb_pred','xgb2_pred','ctb2_pred']

# Optuna

In [10]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [11]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo[useful_features], y=all_pseudo[target]):
  """
  """
  param_space = {
                    'objective': 'regression',
              'metric':'rmse',
               #'device':'gpu',  # Use GPU acceleration
               # 'gpu_platform_id': 0,
                #    'gpu_device_id': 0,
               'n_estimators':N_ESTIMATORS,
              'learning_rate':trial.suggest_uniform('learning_rate', 1e-3, 1e-1),
               'subsample': trial.suggest_uniform('subsample', 0.2, 1.0),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.2, 1.0),
                'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
             #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 0.01, 50),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 0.01, 50),
            'min_child_weight':trial.suggest_uniform('min_child_weight', 100, 500),
                 'min_child_samples':trial.suggest_int('min_child_samples', 5, 250),
          'importance_type': 'gain'
                }
            

  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo[useful_features].iloc[trn_idx], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_rn(X_train)

  X_valid, y_valid = all_pseudo[useful_features].iloc[oof_idx], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]

  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
    )


    lgb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, lgb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [12]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-28 03:26:43,010][0m A new study created in memory with name: no-name-5f6df95a-e3d4-474e-aa39-47bf4fa5e5e5[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 7.74759
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:27:16,298][0m Trial 0 finished with value: 7.746530502556741 and parameters: {'learning_rate': 0.04744603349824434, 'subsample': 0.2835255174317608, 'colsample_bytree': 0.9352143587168733, 'subsample_freq': 5, 'reg_lambda': 13.47054249042285, 'reg_alpha': 47.719384953009964, 'min_child_weight': 232.5421357887502, 'min_child_samples': 243}. Best is trial 0 with value: 7.746530502556741.[0m


Early stopping, best iteration is:
[51]	valid_0's rmse: 7.74727
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[38]	valid_0's rmse: 7.74527
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:27:57,789][0m Trial 1 finished with value: 7.744976969410505 and parameters: {'learning_rate': 0.08308531538923612, 'subsample': 0.7873530816742165, 'colsample_bytree': 0.899101218212899, 'subsample_freq': 2, 'reg_lambda': 48.30266168546053, 'reg_alpha': 14.520496808032021, 'min_child_weight': 188.07114783827754, 'min_child_samples': 192}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[29]	valid_0's rmse: 7.7456
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[748]	valid_0's rmse: 7.74555
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[774]	valid_0's rmse: 7.74493


[32m[I 2021-08-28 03:30:26,527][0m Trial 2 finished with value: 7.74517113506254 and parameters: {'learning_rate': 0.003918913174636364, 'subsample': 0.4762508364480037, 'colsample_bytree': 0.9342995336598392, 'subsample_freq': 3, 'reg_lambda': 44.28194169429444, 'reg_alpha': 19.854236700748988, 'min_child_weight': 307.1653417580633, 'min_child_samples': 196}. Best is trial 1 with value: 7.744976969410505.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[39]	valid_0's rmse: 7.74871
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:30:56,763][0m Trial 3 finished with value: 7.74655829680481 and parameters: {'learning_rate': 0.07341881804766599, 'subsample': 0.2848293054813087, 'colsample_bytree': 0.8921531844197947, 'subsample_freq': 8, 'reg_lambda': 37.247871965126876, 'reg_alpha': 45.02050762142054, 'min_child_weight': 129.60163558714265, 'min_child_samples': 122}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[32]	valid_0's rmse: 7.74704
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[104]	valid_0's rmse: 7.74545
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:31:44,054][0m Trial 4 finished with value: 7.7450836417357 and parameters: {'learning_rate': 0.03219538985423301, 'subsample': 0.7507789271155583, 'colsample_bytree': 0.7314662441487081, 'subsample_freq': 7, 'reg_lambda': 10.442473147329013, 'reg_alpha': 48.132531774325805, 'min_child_weight': 149.09123821577765, 'min_child_samples': 168}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[89]	valid_0's rmse: 7.7452
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[75]	valid_0's rmse: 7.74557
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:32:22,671][0m Trial 5 finished with value: 7.745903110486187 and parameters: {'learning_rate': 0.03445144830154262, 'subsample': 0.4466028737074312, 'colsample_bytree': 0.8791368642143944, 'subsample_freq': 3, 'reg_lambda': 6.51158570758308, 'reg_alpha': 11.796867575394904, 'min_child_weight': 109.06461082492504, 'min_child_samples': 210}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[85]	valid_0's rmse: 7.74705
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[291]	valid_0's rmse: 7.7468
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[346]	valid_0's rmse: 7.74705


[32m[I 2021-08-28 03:33:01,930][0m Trial 6 finished with value: 7.746708452654075 and parameters: {'learning_rate': 0.008783996659195888, 'subsample': 0.2183137111581421, 'colsample_bytree': 0.5322732686309188, 'subsample_freq': 2, 'reg_lambda': 13.50360608985776, 'reg_alpha': 0.506715773644712, 'min_child_weight': 236.95878701187385, 'min_child_samples': 183}. Best is trial 1 with value: 7.744976969410505.[0m


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[37]	valid_0's rmse: 7.7519
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:33:17,915][0m Trial 7 finished with value: 7.749492723283063 and parameters: {'learning_rate': 0.09754775711379254, 'subsample': 0.23570261532078396, 'colsample_bytree': 0.22536152691102487, 'subsample_freq': 8, 'reg_lambda': 49.96864723375021, 'reg_alpha': 14.52829966644013, 'min_child_weight': 260.14206378383324, 'min_child_samples': 203}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[28]	valid_0's rmse: 7.75094
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 7.74602
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:34:07,557][0m Trial 8 finished with value: 7.745267856049535 and parameters: {'learning_rate': 0.04415218791608094, 'subsample': 0.6760217596931348, 'colsample_bytree': 0.9815665001048206, 'subsample_freq': 2, 'reg_lambda': 7.937272615594959, 'reg_alpha': 3.0356271183371932, 'min_child_weight': 427.101371589134, 'min_child_samples': 95}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[61]	valid_0's rmse: 7.74507
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[38]	valid_0's rmse: 7.74652
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:34:32,964][0m Trial 9 finished with value: 7.7464829546827785 and parameters: {'learning_rate': 0.08059856393180483, 'subsample': 0.7175166510211635, 'colsample_bytree': 0.44885789549533606, 'subsample_freq': 3, 'reg_lambda': 36.28673868795419, 'reg_alpha': 16.314934645993358, 'min_child_weight': 358.2155583268984, 'min_child_samples': 115}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[32]	valid_0's rmse: 7.74732
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[40]	valid_0's rmse: 7.74545
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:35:10,239][0m Trial 10 finished with value: 7.7450560930215575 and parameters: {'learning_rate': 0.09709467838259253, 'subsample': 0.9547008776208757, 'colsample_bytree': 0.7094454828071441, 'subsample_freq': 5, 'reg_lambda': 25.555692946313723, 'reg_alpha': 33.73826459848645, 'min_child_weight': 181.22950091609616, 'min_child_samples': 45}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[29]	valid_0's rmse: 7.74556
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[34]	valid_0's rmse: 7.74562
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:35:48,236][0m Trial 11 finished with value: 7.74534407329364 and parameters: {'learning_rate': 0.0991038637477068, 'subsample': 0.9990844943252951, 'colsample_bytree': 0.7141578312119812, 'subsample_freq': 5, 'reg_lambda': 24.746618264870552, 'reg_alpha': 32.918283634855726, 'min_child_weight': 175.60818095847736, 'min_child_samples': 35}. Best is trial 1 with value: 7.744976969410505.[0m


Early stopping, best iteration is:
[32]	valid_0's rmse: 7.74578
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[36]	valid_0's rmse: 7.74547
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:36:29,139][0m Trial 12 finished with value: 7.744580555420945 and parameters: {'learning_rate': 0.07482173700430986, 'subsample': 0.9432860713025384, 'colsample_bytree': 0.7372164751083186, 'subsample_freq': 10, 'reg_lambda': 25.86449009515078, 'reg_alpha': 32.8371590794605, 'min_child_weight': 193.13591358983584, 'min_child_samples': 14}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[64]	valid_0's rmse: 7.74465
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[37]	valid_0's rmse: 7.7458
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:37:09,298][0m Trial 13 finished with value: 7.745071240372669 and parameters: {'learning_rate': 0.06722448233544273, 'subsample': 0.8805659969588169, 'colsample_bytree': 0.791439043851348, 'subsample_freq': 10, 'reg_lambda': 24.829547987153394, 'reg_alpha': 29.293682753771122, 'min_child_weight': 199.4339337614927, 'min_child_samples': 73}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[35]	valid_0's rmse: 7.74487
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 7.7459
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:37:43,030][0m Trial 14 finished with value: 7.745657465902792 and parameters: {'learning_rate': 0.061542908405107496, 'subsample': 0.8457415202935383, 'colsample_bytree': 0.5766305427489353, 'subsample_freq': 10, 'reg_lambda': 0.540691415089551, 'reg_alpha': 40.12633065188566, 'min_child_weight': 299.0982094926036, 'min_child_samples': 151}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[49]	valid_0's rmse: 7.74614
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[40]	valid_0's rmse: 7.74561
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:38:22,544][0m Trial 15 finished with value: 7.744885862656994 and parameters: {'learning_rate': 0.08289375144998179, 'subsample': 0.8432727871996196, 'colsample_bytree': 0.8181004094104688, 'subsample_freq': 1, 'reg_lambda': 32.26283777501858, 'reg_alpha': 7.409147353280449, 'min_child_weight': 112.67993167228761, 'min_child_samples': 6}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[29]	valid_0's rmse: 7.74507
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[39]	valid_0's rmse: 7.74717
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:38:48,891][0m Trial 16 finished with value: 7.746947166559191 and parameters: {'learning_rate': 0.08352463670206897, 'subsample': 0.9396587365263801, 'colsample_bytree': 0.4152672179407155, 'subsample_freq': 7, 'reg_lambda': 34.09003465053139, 'reg_alpha': 23.526757553818403, 'min_child_weight': 125.16932689420136, 'min_child_samples': 5}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[41]	valid_0's rmse: 7.74752
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[56]	valid_0's rmse: 7.74671
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:39:27,167][0m Trial 17 finished with value: 7.744977401212421 and parameters: {'learning_rate': 0.06392922786087736, 'subsample': 0.5865237369933985, 'colsample_bytree': 0.79648829716662, 'subsample_freq': 9, 'reg_lambda': 29.577421647568535, 'reg_alpha': 4.664257537052478, 'min_child_weight': 489.13136916792985, 'min_child_samples': 11}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[41]	valid_0's rmse: 7.74461
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[35]	valid_0's rmse: 7.74633
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:40:01,580][0m Trial 18 finished with value: 7.745641806051556 and parameters: {'learning_rate': 0.08811807652910686, 'subsample': 0.8658620276459412, 'colsample_bytree': 0.6347122826173373, 'subsample_freq': 1, 'reg_lambda': 18.65251911982391, 'reg_alpha': 37.55566085293212, 'min_child_weight': 104.92502289856618, 'min_child_samples': 41}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[63]	valid_0's rmse: 7.74616
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[84]	valid_0's rmse: 7.74566
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:40:44,314][0m Trial 19 finished with value: 7.74498944402073 and parameters: {'learning_rate': 0.05605195381344236, 'subsample': 0.9908887256513148, 'colsample_bytree': 0.6486558614493592, 'subsample_freq': 6, 'reg_lambda': 41.1507384120151, 'reg_alpha': 26.21299341868407, 'min_child_weight': 294.74350553770194, 'min_child_samples': 70}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[63]	valid_0's rmse: 7.74489
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[33]	valid_0's rmse: 7.74599
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:41:18,765][0m Trial 20 finished with value: 7.7459492929529485 and parameters: {'learning_rate': 0.07475428124017809, 'subsample': 0.6182173145309335, 'colsample_bytree': 0.80926190242928, 'subsample_freq': 4, 'reg_lambda': 30.607248721638697, 'reg_alpha': 8.546798194950306, 'min_child_weight': 150.90271126129466, 'min_child_samples': 8}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[34]	valid_0's rmse: 7.74687
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[39]	valid_0's rmse: 7.74514
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:41:58,556][0m Trial 21 finished with value: 7.74486468639228 and parameters: {'learning_rate': 0.08862068322693592, 'subsample': 0.8235307812925255, 'colsample_bytree': 0.8400121328570277, 'subsample_freq': 1, 'reg_lambda': 48.574003313272236, 'reg_alpha': 9.068810863907077, 'min_child_weight': 194.09305833769616, 'min_child_samples': 240}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[30]	valid_0's rmse: 7.74549
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[36]	valid_0's rmse: 7.74582
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:42:37,797][0m Trial 22 finished with value: 7.745524356677362 and parameters: {'learning_rate': 0.09078613392577423, 'subsample': 0.8020258254952701, 'colsample_bytree': 0.8276387838704017, 'subsample_freq': 1, 'reg_lambda': 19.273195458405915, 'reg_alpha': 7.920515496945461, 'min_child_weight': 213.95088536343292, 'min_child_samples': 241}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[29]	valid_0's rmse: 7.74621
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[38]	valid_0's rmse: 7.74538
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:43:31,122][0m Trial 23 finished with value: 7.744900591123343 and parameters: {'learning_rate': 0.07552466334701005, 'subsample': 0.9210226772330463, 'colsample_bytree': 0.9830559267541416, 'subsample_freq': 1, 'reg_lambda': 41.72422185585016, 'reg_alpha': 20.259229416913314, 'min_child_weight': 262.56005230943384, 'min_child_samples': 30}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[57]	valid_0's rmse: 7.74527
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[32]	valid_0's rmse: 7.74546
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:44:06,173][0m Trial 24 finished with value: 7.745277554037198 and parameters: {'learning_rate': 0.0922848153565242, 'subsample': 0.8439975524581309, 'colsample_bytree': 0.7346729379133021, 'subsample_freq': 1, 'reg_lambda': 19.785106621461498, 'reg_alpha': 0.14248410813089762, 'min_child_weight': 155.733832248529, 'min_child_samples': 73}. Best is trial 12 with value: 7.744580555420945.[0m


Early stopping, best iteration is:
[28]	valid_0's rmse: 7.74596
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 7.74556
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:44:38,425][0m Trial 25 finished with value: 7.7443511015243764 and parameters: {'learning_rate': 0.06708778228872987, 'subsample': 0.6682126072656112, 'colsample_bytree': 0.6797661362187238, 'subsample_freq': 4, 'reg_lambda': 29.62573177775194, 'reg_alpha': 8.474459963363486, 'min_child_weight': 109.35156229664032, 'min_child_samples': 143}. Best is trial 25 with value: 7.7443511015243764.[0m


Early stopping, best iteration is:
[39]	valid_0's rmse: 7.74406
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[41]	valid_0's rmse: 7.74744
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:45:04,789][0m Trial 26 finished with value: 7.74680059295163 and parameters: {'learning_rate': 0.06985997885311013, 'subsample': 0.5423231503785397, 'colsample_bytree': 0.5209609334913776, 'subsample_freq': 4, 'reg_lambda': 28.275446473838162, 'reg_alpha': 27.731685338924112, 'min_child_weight': 342.4781251799802, 'min_child_samples': 145}. Best is trial 25 with value: 7.7443511015243764.[0m


Early stopping, best iteration is:
[36]	valid_0's rmse: 7.74714
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[57]	valid_0's rmse: 7.74571
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:45:38,302][0m Trial 27 finished with value: 7.745023332043146 and parameters: {'learning_rate': 0.05682019708094006, 'subsample': 0.6919822831964894, 'colsample_bytree': 0.6548279288832874, 'subsample_freq': 6, 'reg_lambda': 46.48518536129634, 'reg_alpha': 19.832001534528736, 'min_child_weight': 218.42714269587466, 'min_child_samples': 236}. Best is trial 25 with value: 7.7443511015243764.[0m


Early stopping, best iteration is:
[48]	valid_0's rmse: 7.74509
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[44]	valid_0's rmse: 7.747
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:46:11,793][0m Trial 28 finished with value: 7.745961340353425 and parameters: {'learning_rate': 0.0589788477446095, 'subsample': 0.6319674745709629, 'colsample_bytree': 0.6810352173954924, 'subsample_freq': 4, 'reg_lambda': 38.4640752337413, 'reg_alpha': 41.39511900236889, 'min_child_weight': 260.94401483344757, 'min_child_samples': 223}. Best is trial 25 with value: 7.7443511015243764.[0m


Early stopping, best iteration is:
[46]	valid_0's rmse: 7.74563
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 7.74629
Training until validation scores don't improve for 200 rounds


[32m[I 2021-08-28 03:46:47,678][0m Trial 29 finished with value: 7.745309158485609 and parameters: {'learning_rate': 0.04756202181585209, 'subsample': 0.7650909083770525, 'colsample_bytree': 0.586204978685408, 'subsample_freq': 7, 'reg_lambda': 15.914689238099674, 'reg_alpha': 32.96590102044819, 'min_child_weight': 237.05384911756303, 'min_child_samples': 107}. Best is trial 25 with value: 7.7443511015243764.[0m


Early stopping, best iteration is:
[92]	valid_0's rmse: 7.74526
Number of finished trials: 30
Best trial: {'learning_rate': 0.06708778228872987, 'subsample': 0.6682126072656112, 'colsample_bytree': 0.6797661362187238, 'subsample_freq': 4, 'reg_lambda': 29.62573177775194, 'reg_alpha': 8.474459963363486, 'min_child_weight': 109.35156229664032, 'min_child_samples': 143}


In [13]:
study.best_params

{'learning_rate': 0.06708778228872987,
 'subsample': 0.6682126072656112,
 'colsample_bytree': 0.6797661362187238,
 'subsample_freq': 4,
 'reg_lambda': 29.62573177775194,
 'reg_alpha': 8.474459963363486,
 'min_child_weight': 109.35156229664032,
 'min_child_samples': 143}

# Log

===== 2 preds ======

7.744188593192304 no noise ver1 final

7.7462672672725175 row-wise ver3

7.74692198293704 random ver4

====== 4 preds ======