In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import random
import os
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import lightgbm as lgb
#import xgboost as xgb
#import catboost as ctb

import warnings
warnings.simplefilter('ignore')

# Parameters

In [2]:
target = 'target'

DEBUG = False

if DEBUG:
    N_ESTIMATORS = 1
    N_SPLITS = 2
    SEED = 17
    CVSEED = 17
    EARLY_STOPPING_ROUNDS = 1
    VERBOSE = 100
    #N_ITERS = 2
else:
    N_SPLITS = 10
    N_ESTIMATORS = 20000
    EARLY_STOPPING_ROUNDS = 300
    VERBOSE = 1000
    SEED = 17
    CVSEED = 17
    #N_ITERS = 10

In [3]:
def set_seed(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
set_seed(SEED)

# Load data

In [4]:
# Load the training data
train = pd.read_csv("../input/30-days-of-ml/train.csv")
test = pd.read_csv("../input/30-days-of-ml/test.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,B,B,B,C,B,B,A,E,C,...,0.400361,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634
1,2,B,B,A,A,B,D,A,F,A,...,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233
2,3,A,A,A,C,B,D,A,D,A,...,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351
3,4,B,B,A,C,B,D,A,E,C,...,0.66898,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253
4,6,A,A,A,C,B,D,A,E,A,...,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226


In [5]:
train['lgb_pred'] = np.load("../input/30d-lgb-pred/4lgb_oof.npy")+ np.load("../input/30d-lgb-pred/5lgb_oof.npy")
test['lgb_pred'] = np.load("../input/30d-lgb-pred/4lgb_pred.npy")+ np.load("../input/30d-lgb-pred/5lgb_pred.npy")

train['xgb_pred'] = np.load("../input/30d-ml-2021-xgb/xgb_oof.npy")
test['xgb_pred'] = np.load("../input/30d-ml-2021-xgb/xgb_pred.npy")

train['lgb2_pred'] = np.load("../input/30d-ml-2021-lgb-model-2/lgb_oof.npy")
test['lgb2_pred'] = np.load("../input/30d-ml-2021-lgb-model-2/lgb_pred.npy")

train['xgb2_pred'] = np.load("../input/30d-ml-2021-xgb-model-2/xgb_oof.npy")
test['xgb2_pred'] = np.load("../input/30d-ml-2021-xgb-model-2/xgb_pred.npy")

train['ctb2_pred'] = np.load("../input/30d-ml-2021-catb-model-2/ctb_oof.npy")
test['ctb2_pred'] = np.load("../input/30d-ml-2021-catb-model-2/ctb_pred.npy")

train['lgb_p2_pred'] = np.load("../input/30d-ml-2021-lgb-pseudo2/lgb_oof.npy")
test['lgb_p2_pred'] = np.load("../input/30d-ml-2021-lgb-pseudo2/lgb_pred.npy")

train['xgb_p2_pred'] = np.load("../input/30d-ml-2021-xgb-pseudo2/xgb_oof.npy")
test['xgb_p2_pred'] = np.load("../input/30d-ml-2021-xgb-pseudo2/xgb_pred.npy")

train['lgb2_p2_pred'] = np.load("../input/30d-ml-2021-lgb-model-2-pseudo2/lgb_oof.npy")
test['lgb2_p2_pred'] = np.load("../input/30d-ml-2021-lgb-model-2-pseudo2/lgb_pred.npy")

train['xgb2_p2_pred'] = np.load("../input/30d-ml-2021-xgb-model-2-pseudo-2/xgb_oof.npy")
test['xgb2_p2_pred'] = np.load("../input/30d-ml-2021-xgb-model-2-pseudo-2/xgb_pred.npy")

train['ctb2_p2_pred'] = np.load("../input/30d-ml-2021-catb-model-2-pseudo2/ctb_oof.npy")
test['ctb2_p2_pred'] = np.load("../input/30d-ml-2021-catb-model-2-pseudo2/ctb_pred.npy")

In [6]:
# Separate target from features
y = train['target']
features = train.drop(['id','target'], axis=1)

# Preview features
features.head()

Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,lgb_pred,xgb_pred,lgb2_pred,xgb2_pred,ctb2_pred,lgb_p2_pred,xgb_p2_pred,lgb2_p2_pred,xgb2_p2_pred,ctb2_p2_pred
0,B,B,B,C,B,B,A,E,C,N,...,8.41219,8.406845,8.429377,8.463968,8.42291,8.447788,8.409527,8.424738,8.45358,8.462934
1,B,B,A,A,B,D,A,F,A,O,...,8.362972,8.342352,8.298047,8.321292,8.315539,8.356257,8.358488,8.330516,8.345897,8.336991
2,A,A,A,C,B,D,A,D,A,F,...,8.214132,8.204342,8.222232,8.199663,8.207955,8.225424,8.228208,8.233979,8.237179,8.222193
3,B,B,A,C,B,D,A,E,C,K,...,8.38573,8.386736,8.393528,8.400892,8.363494,8.38451,8.419605,8.409301,8.40453,8.371811
4,A,A,A,C,B,D,A,E,A,N,...,8.205836,8.197277,8.20623,8.210944,8.227323,8.223536,8.206048,8.236329,8.230536,8.28097


# Preprocessing

In [7]:

# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# ordinal-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()
ordinal_encoder = OrdinalEncoder()
X[object_cols] = ordinal_encoder.fit_transform(features[object_cols])
X_test[object_cols] = ordinal_encoder.transform(test[object_cols])

# Preview the ordinal-encoded features
X.head()


Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,lgb_pred,xgb_pred,lgb2_pred,xgb2_pred,ctb2_pred,lgb_p2_pred,xgb_p2_pred,lgb2_p2_pred,xgb2_p2_pred,ctb2_p2_pred
0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,4.0,2.0,13.0,...,8.41219,8.406845,8.429377,8.463968,8.42291,8.447788,8.409527,8.424738,8.45358,8.462934
1,1.0,1.0,0.0,0.0,1.0,3.0,0.0,5.0,0.0,14.0,...,8.362972,8.342352,8.298047,8.321292,8.315539,8.356257,8.358488,8.330516,8.345897,8.336991
2,0.0,0.0,0.0,2.0,1.0,3.0,0.0,3.0,0.0,5.0,...,8.214132,8.204342,8.222232,8.199663,8.207955,8.225424,8.228208,8.233979,8.237179,8.222193
3,1.0,1.0,0.0,2.0,1.0,3.0,0.0,4.0,2.0,10.0,...,8.38573,8.386736,8.393528,8.400892,8.363494,8.38451,8.419605,8.409301,8.40453,8.371811
4,0.0,0.0,0.0,2.0,1.0,3.0,0.0,4.0,0.0,13.0,...,8.205836,8.197277,8.20623,8.210944,8.227323,8.223536,8.206048,8.236329,8.230536,8.28097


In [8]:
'''
# List of categorical columns
object_cols = [col for col in features.columns if 'cat' in col]

# one-hot-encode categorical columns
X = features.copy()
X_test = test.drop(['id'], axis=1).copy()

oh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_ohe = oh_encoder.fit_transform(features[object_cols])
X_test_ohe = oh_encoder.transform(test[object_cols])

X_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])
X_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

X = pd.concat([X, X_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)
X = X.drop(object_cols, axis=1)
X_test = X_test.drop(object_cols, axis=1)
    
# Preview the one-hot-encoded features
X.head()
'''

'\n# List of categorical columns\nobject_cols = [col for col in features.columns if \'cat\' in col]\n\n# one-hot-encode categorical columns\nX = features.copy()\nX_test = test.drop([\'id\'], axis=1).copy()\n\noh_encoder = OneHotEncoder(sparse=False, handle_unknown="ignore")\nX_ohe = oh_encoder.fit_transform(features[object_cols])\nX_test_ohe = oh_encoder.transform(test[object_cols])\n\nX_ohe = pd.DataFrame(X_ohe, columns=[f"ohe_{i}" for i in range(X_ohe.shape[1])])\nX_test_ohe = pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])\n\nX = pd.concat([X, X_ohe], axis=1)\nX_test = pd.concat([X_test, X_test_ohe], axis=1)\nX = X.drop(object_cols, axis=1)\nX_test = X_test.drop(object_cols, axis=1)\n    \n# Preview the one-hot-encoded features\nX.head()\n'

In [9]:
scale_features = [col for col in features.columns if 'cont' in col]

ss = StandardScaler()
X[scale_features] = ss.fit_transform(features[scale_features])
X_test[scale_features] = ss.transform(test[scale_features])

In [10]:
# Swap noise

# Random
def apply_noise_rn(df, p=.75):
    should_not_swap = np.random.binomial(1, p, df.shape)
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Row-wise
def apply_noise_row(df, p=.75):
    should_not_swap = np.zeros(df.shape)
    for i in range(df.shape[0]):
        for j in np.random.choice(df.shape[1],int(p*df.shape[1]),replace=False):
            should_not_swap[i,j]=1 
    corrupted_df = df.where(should_not_swap == 1, np.random.permutation(df))
    return corrupted_df

# Pseudo Label

In [11]:
pseudo = pd.read_csv("../input/competition-day-6-stacking10/submission.csv")[target]
train_pseudo = pd.concat([X, y], axis=1)
test_pseudo = pd.concat([X_test, pseudo], axis=1)
all_pseudo = pd.concat([train_pseudo, test_pseudo]).reset_index(drop=True)

# Optuna

In [12]:
# Optuna for parameter search
!pip install -q optuna

import optuna
import pickle



In [13]:
# for the fixed learning rate, use the opt n iterations and tune the tree hyperparameters
def objective(trial, X=all_pseudo.iloc[:,:-1], y=all_pseudo[target]):
  """
  """
  param_space = {
              #'device':'gpu',  # Use GPU acceleration
               #'boosting_type': 'gbdt',
               'reg_lambda':trial.suggest_uniform('reg_lambda', 0.01, 50),
              'reg_alpha':trial.suggest_uniform('reg_alpha', 0.01, 50),
                'subsample': trial.suggest_uniform('subsample', 0.1, 1.0),
              'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.1, 1.0),
                #'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
               'learning_rate':trial.suggest_uniform('learning_rate', 1e-3, 1e-1),
                 'min_child_samples':trial.suggest_int('min_child_samples', 5, 100),
              'num_leaves':trial.suggest_int('num_leaves', 10, 200),
              'max_depth':trial.suggest_int('max_depth', 3, 30),
              #'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 0.005),
              #'class_weight':trial.suggest_categorical('class_weight',['balanced',None]),
               'n_estimators':N_ESTIMATORS,
                'n_jobs' : -1,
              'metric':'rmse',
              'max_bin':trial.suggest_int('max_bin', 300, 1000),
              'cat_smooth':trial.suggest_int('cat_smooth', 5, 100),
              'cat_l2':trial.suggest_loguniform('cat_l2', 1e-3, 100)
                }
            
  seed_list=[SEED, SEED+1]
  #kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=CVSEED)
  lgb_oof = np.zeros(train.shape[0])
  #lgb_pred = np.zeros(test.shape[0])
  X_tr, X_va, y_tr, y_va = train_test_split(X,y,test_size=.2,random_state=CVSEED)
  trn_idx = X_tr.index
  val_idx = X_va.index
  
  #for (trn_idx, val_idx) in enumerate(kf.split(X, y)):
  #print(f"===== fold {fold} =====")
  oof_idx = np.array([idx for idx in val_idx if idx < train.shape[0]])
  preds_idx = np.array([idx for idx in val_idx if idx >= train.shape[0]])

  X_train, y_train = all_pseudo.iloc[trn_idx, :-1], all_pseudo[target].iloc[trn_idx]
  #X_train = apply_noise_row(X_train)

  X_valid, y_valid = all_pseudo.iloc[oof_idx, :-1], all_pseudo[target].iloc[oof_idx]
  #X_test = all_pseudo[scale_features].iloc[preds_idx]
  
  #start = time.time()
  for inseed in seed_list:
    param_space['random_state'] = inseed

    model = lgb.LGBMRegressor(**param_space)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
        categorical_feature=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        #callbacks=[optuna.integration.LightGBMPruningCallback(trial, metric='rmse')],
    )


    lgb_oof[oof_idx] += model.predict(X_valid) / len(seed_list)
    #lgb_pred[preds_idx-train.shape[0]] += model.predict(X_test) / len(seed_list)

  #elapsed = time.time() - start
  rmse = mean_squared_error(y_valid, lgb_oof[oof_idx], squared=False)
  #print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")            
  
  #print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")

  
  return rmse

In [14]:
study = optuna.create_study(direction='minimize')
study.optimize(objective,n_trials= 15)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-08-31 12:00:25,005][0m A new study created in memory with name: no-name-368a85b3-a2ed-441f-ae3e-133aa468be5f[0m


Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[49]	valid_0's rmse: 0.708379
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:00:47,528][0m Trial 0 finished with value: 0.7083360781008808 and parameters: {'reg_lambda': 16.71311410752518, 'reg_alpha': 29.150427411827117, 'subsample': 0.6911530701868912, 'colsample_bytree': 0.36777669382651357, 'learning_rate': 0.08176996518286897, 'min_child_samples': 66, 'num_leaves': 90, 'max_depth': 28, 'max_bin': 480, 'cat_smooth': 49, 'cat_l2': 0.019913160583344105}. Best is trial 0 with value: 0.7083360781008808.[0m


Early stopping, best iteration is:
[49]	valid_0's rmse: 0.708337
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[131]	valid_0's rmse: 0.708298
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[117]	valid_0's rmse: 0.708284


[32m[I 2021-08-31 12:01:15,670][0m Trial 1 finished with value: 0.7082794204503696 and parameters: {'reg_lambda': 39.83488026173493, 'reg_alpha': 12.318754186978246, 'subsample': 0.1592288428780441, 'colsample_bytree': 0.9051789510221443, 'learning_rate': 0.04359784605660522, 'min_child_samples': 84, 'num_leaves': 37, 'max_depth': 20, 'max_bin': 388, 'cat_smooth': 64, 'cat_l2': 28.143687816344105}. Best is trial 1 with value: 0.7082794204503696.[0m


Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[61]	valid_0's rmse: 0.708269
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:01:32,047][0m Trial 2 finished with value: 0.7082514515200967 and parameters: {'reg_lambda': 13.225379119482772, 'reg_alpha': 4.289762269509225, 'subsample': 0.29788655682109016, 'colsample_bytree': 0.5819198772725399, 'learning_rate': 0.08475833089974247, 'min_child_samples': 41, 'num_leaves': 32, 'max_depth': 11, 'max_bin': 308, 'cat_smooth': 40, 'cat_l2': 1.83524847415674}. Best is trial 2 with value: 0.7082514515200967.[0m


Early stopping, best iteration is:
[47]	valid_0's rmse: 0.708274
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[174]	valid_0's rmse: 0.708419
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[161]	valid_0's rmse: 0.708401


[32m[I 2021-08-31 12:02:25,425][0m Trial 3 finished with value: 0.7084026629645883 and parameters: {'reg_lambda': 39.25619870930596, 'reg_alpha': 43.93682590920673, 'subsample': 0.5290188541663902, 'colsample_bytree': 0.8364677438527506, 'learning_rate': 0.024586060938392548, 'min_child_samples': 53, 'num_leaves': 174, 'max_depth': 8, 'max_bin': 747, 'cat_smooth': 82, 'cat_l2': 0.003509146862127828}. Best is trial 2 with value: 0.7082514515200967.[0m


Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[118]	valid_0's rmse: 0.70833
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[109]	valid_0's rmse: 0.708317


[32m[I 2021-08-31 12:03:02,073][0m Trial 4 finished with value: 0.7083064232045374 and parameters: {'reg_lambda': 5.6706875877817025, 'reg_alpha': 17.258280583518463, 'subsample': 0.24673482774578187, 'colsample_bytree': 0.989032350948719, 'learning_rate': 0.03658190984206483, 'min_child_samples': 91, 'num_leaves': 78, 'max_depth': 18, 'max_bin': 340, 'cat_smooth': 80, 'cat_l2': 0.022906321882816685}. Best is trial 2 with value: 0.7082514515200967.[0m


Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[107]	valid_0's rmse: 0.70875
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[101]	valid_0's rmse: 0.708715


[32m[I 2021-08-31 12:03:28,260][0m Trial 5 finished with value: 0.7086909492317531 and parameters: {'reg_lambda': 5.348571138742473, 'reg_alpha': 44.49990098900952, 'subsample': 0.546856489337325, 'colsample_bytree': 0.12851108397155483, 'learning_rate': 0.058758335551058805, 'min_child_samples': 29, 'num_leaves': 158, 'max_depth': 16, 'max_bin': 917, 'cat_smooth': 35, 'cat_l2': 46.4604198553973}. Best is trial 2 with value: 0.7082514515200967.[0m


Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[39]	valid_0's rmse: 0.708473
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:03:51,467][0m Trial 6 finished with value: 0.7083964727362505 and parameters: {'reg_lambda': 4.862022247463815, 'reg_alpha': 2.73395010065672, 'subsample': 0.49210907433963125, 'colsample_bytree': 0.5297976005493427, 'learning_rate': 0.0790948251670354, 'min_child_samples': 62, 'num_leaves': 131, 'max_depth': 12, 'max_bin': 335, 'cat_smooth': 76, 'cat_l2': 0.14875226328546515}. Best is trial 2 with value: 0.7082514515200967.[0m


Early stopping, best iteration is:
[46]	valid_0's rmse: 0.708444
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[164]	valid_0's rmse: 0.708334
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[218]	valid_0's rmse: 0.708345


[32m[I 2021-08-31 12:04:31,525][0m Trial 7 finished with value: 0.7083249589581768 and parameters: {'reg_lambda': 49.96070483274772, 'reg_alpha': 10.265529367571869, 'subsample': 0.17920140865791978, 'colsample_bytree': 0.5918101233385454, 'learning_rate': 0.023464180145558983, 'min_child_samples': 88, 'num_leaves': 75, 'max_depth': 23, 'max_bin': 797, 'cat_smooth': 20, 'cat_l2': 0.0587009274898848}. Best is trial 2 with value: 0.7082514515200967.[0m


Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 0.70841
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:05:07,392][0m Trial 8 finished with value: 0.7084028939524983 and parameters: {'reg_lambda': 38.4054120718746, 'reg_alpha': 49.8035413384593, 'subsample': 0.8465290613904913, 'colsample_bytree': 0.8773827118755158, 'learning_rate': 0.09012764008988502, 'min_child_samples': 62, 'num_leaves': 118, 'max_depth': 27, 'max_bin': 349, 'cat_smooth': 29, 'cat_l2': 24.85593801952012}. Best is trial 2 with value: 0.7082514515200967.[0m


Early stopping, best iteration is:
[37]	valid_0's rmse: 0.708452
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[87]	valid_0's rmse: 0.708457
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:05:28,769][0m Trial 9 finished with value: 0.7084692648048331 and parameters: {'reg_lambda': 40.12137315270792, 'reg_alpha': 26.06972991067325, 'subsample': 0.9631287285261552, 'colsample_bytree': 0.2083758099822828, 'learning_rate': 0.06608507463686712, 'min_child_samples': 93, 'num_leaves': 91, 'max_depth': 28, 'max_bin': 378, 'cat_smooth': 52, 'cat_l2': 8.182426559550203}. Best is trial 2 with value: 0.7082514515200967.[0m


Early stopping, best iteration is:
[67]	valid_0's rmse: 0.708537
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[51]	valid_0's rmse: 0.708254
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:05:41,734][0m Trial 10 finished with value: 0.7082304234010356 and parameters: {'reg_lambda': 19.519366734282507, 'reg_alpha': 1.3275018109541037, 'subsample': 0.3713936625423869, 'colsample_bytree': 0.5898403781754266, 'learning_rate': 0.09738055609191759, 'min_child_samples': 10, 'num_leaves': 10, 'max_depth': 3, 'max_bin': 549, 'cat_smooth': 18, 'cat_l2': 1.3203129968088911}. Best is trial 10 with value: 0.7082304234010356.[0m


Early stopping, best iteration is:
[57]	valid_0's rmse: 0.70822
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[60]	valid_0's rmse: 0.708232
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:05:57,909][0m Trial 11 finished with value: 0.7082241114084727 and parameters: {'reg_lambda': 20.1388248569007, 'reg_alpha': 4.881381271350994, 'subsample': 0.3404911440135218, 'colsample_bytree': 0.6518015401509105, 'learning_rate': 0.09949593978380408, 'min_child_samples': 5, 'num_leaves': 14, 'max_depth': 3, 'max_bin': 570, 'cat_smooth': 5, 'cat_l2': 1.4334223364439564}. Best is trial 11 with value: 0.7082241114084727.[0m


Early stopping, best iteration is:
[54]	valid_0's rmse: 0.708229
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[49]	valid_0's rmse: 0.708234
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:06:11,527][0m Trial 12 finished with value: 0.708244296376391 and parameters: {'reg_lambda': 26.0137254472435, 'reg_alpha': 0.9266991650544085, 'subsample': 0.37968064139321656, 'colsample_bytree': 0.7117196848428201, 'learning_rate': 0.0999888115938618, 'min_child_samples': 8, 'num_leaves': 14, 'max_depth': 3, 'max_bin': 572, 'cat_smooth': 12, 'cat_l2': 1.4099523588213076}. Best is trial 11 with value: 0.7082241114084727.[0m


Early stopping, best iteration is:
[50]	valid_0's rmse: 0.708264
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[61]	valid_0's rmse: 0.708243
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:06:24,883][0m Trial 13 finished with value: 0.7082360641953526 and parameters: {'reg_lambda': 24.551136116305493, 'reg_alpha': 18.72020079656227, 'subsample': 0.3809504732091884, 'colsample_bytree': 0.4171978419078962, 'learning_rate': 0.09927812951734935, 'min_child_samples': 6, 'num_leaves': 10, 'max_depth': 3, 'max_bin': 603, 'cat_smooth': 6, 'cat_l2': 1.087032661126781}. Best is trial 11 with value: 0.7082241114084727.[0m


Early stopping, best iteration is:
[56]	valid_0's rmse: 0.708244
Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[55]	valid_0's rmse: 0.708339
Training until validation scores don't improve for 300 rounds


[32m[I 2021-08-31 12:06:45,200][0m Trial 14 finished with value: 0.7083047506697105 and parameters: {'reg_lambda': 23.57220438069856, 'reg_alpha': 0.10047089215722771, 'subsample': 0.6942259734879492, 'colsample_bytree': 0.6910606389910317, 'learning_rate': 0.06875864902280626, 'min_child_samples': 20, 'num_leaves': 50, 'max_depth': 6, 'max_bin': 525, 'cat_smooth': 100, 'cat_l2': 0.34743782203473467}. Best is trial 11 with value: 0.7082241114084727.[0m


Early stopping, best iteration is:
[66]	valid_0's rmse: 0.70831
Number of finished trials: 15
Best trial: {'reg_lambda': 20.1388248569007, 'reg_alpha': 4.881381271350994, 'subsample': 0.3404911440135218, 'colsample_bytree': 0.6518015401509105, 'learning_rate': 0.09949593978380408, 'min_child_samples': 5, 'num_leaves': 14, 'max_depth': 3, 'max_bin': 570, 'cat_smooth': 5, 'cat_l2': 1.4334223364439564}


In [15]:
study.best_params

{'reg_lambda': 20.1388248569007,
 'reg_alpha': 4.881381271350994,
 'subsample': 0.3404911440135218,
 'colsample_bytree': 0.6518015401509105,
 'learning_rate': 0.09949593978380408,
 'min_child_samples': 5,
 'num_leaves': 14,
 'max_depth': 3,
 'max_bin': 570,
 'cat_smooth': 5,
 'cat_l2': 1.4334223364439564}

# Log

========== 7 preds ==============
====== Ordinal encoding =========

0.7084260065007167 no noise ver1

========== 10 preds ==============
====== Ordinal encoding =========

res

====== One-hot encoding =========

