In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Hello everyone!

**This notebook presents a straightforward code to tune hyperparameter of LGBM, CAT, and XGB with Bayesian Optimization. It is like GridSearchCV and RandomizedSearchCV.**

GridSearchCV searches for all combinations of parameters, and it could take a very long time. Not very efficient. RandomizedSearchCV searches the combination randomly. Somehow the algorithm can skip the optimal parameter, especially if the search grid is enormous. Bayesian Optimization is a smarter method to tune the hyperparameter. I won't discuss the theory behind it in this notebook as it is straightforward.

If you have any questions regarding the code, please comment below. I will update the notebook accordingly.

**Please do upvote the notebook if this notebook helps you as it will be a benchmark for me to do more work in the future. Thank you :)**

In [None]:
from mlxtend.regressor import StackingCVRegressor
from sklearn.datasets import load_boston
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn import svm
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neural_network import MLPRegressor
from sklearn.decomposition import PCA

from bayes_opt import BayesianOptimization
import warnings
warnings.filterwarnings('ignore')

RANDOM_SEED = 123

In [None]:
train = pd.read_csv("/kaggle/input/tabular-playground-series-jan-2021/train.csv")
test = pd.read_csv("/kaggle/input/tabular-playground-series-jan-2021/test.csv")
sample = pd.read_csv("/kaggle/input/tabular-playground-series-jan-2021/sample_submission.csv")

In [None]:
train['magic1'] = train['cont10']/train['cont11']
train['magic2'] = train['cont11']/train['cont10']
train['magic3'] = train['cont1']/train['cont7']
train['magic4'] = train['cont7']/train['cont1']
train['magic5'] = train['cont4']/train['cont6']

test['magic1'] = test['cont10']/test['cont11']
test['magic2'] = test['cont11']/test['cont10']
test['magic3'] = test['cont1']/test['cont7']
test['magic4'] = test['cont7']/test['cont1']
test['magic5'] = test['cont4']/test['cont6']

In [None]:
train = train.drop('id', axis=1)
test = test.drop('id', axis=1)
X = train.drop('target', axis=1)
y = train.target

In [None]:
from catboost import CatBoostRegressor
cat = CatBoostRegressor(iterations=1000)

In [None]:
model = [cat]
for mod in model:
    score = cross_val_score(mod, X, y, cv=3, scoring="neg_root_mean_squared_error", n_jobs=-1)
    print("CAT RMSE Mean Score: ", np.mean(score))

In [None]:
model = [cat]
for mod in model:
    score = cross_val_score(mod, X, y, cv=10, scoring="neg_root_mean_squared_error", n_jobs=-1)
    print("CAT RMSE Mean Score: ", np.mean(score))

In [None]:
import lightgbm
lgbm = lightgbm.LGBMRegressor(random_state=RANDOM_SEED, n_jobs=-1, metric= 'rmse')

In [None]:
model = [lgbm]
for mod in model:
    score = cross_val_score(mod, X, y, cv=3, scoring="neg_root_mean_squared_error", n_jobs=-1)
    print("LGBM RMSE Mean Score: ", np.mean(score))

In [None]:
model = [lgbm]
for mod in model:
    score = cross_val_score(mod, X, y, cv=10, scoring="neg_root_mean_squared_error", n_jobs=-1)
    print("LGBM RMSE Mean Score: ", np.mean(score))

In [None]:
from xgboost import XGBRegressor
xgbr = XGBRegressor(random_state=RANDOM_SEED)

In [None]:
model = [xgbr]
for mod in model:
    score = cross_val_score(mod, X, y, cv=3, scoring="neg_root_mean_squared_error", n_jobs=-1)
    print("XGB RMSE Mean Score: ", np.mean(score))

Now we will use Bayesian Optimization to tune the hyperparameter. Our goal is to minimize RMSE, but Bayesian Optimization here only support maximizing, so that's why we add a minus sign in the RMSE, so maximizing the minus RMSE is equal to minimizing the RMSE. Just a matter of sign.

You can also adjust what parameter you want to tune and the range of hyperparameter. You can also how many point or how many try during the optimization. 

## LGBM Tuning

In [None]:
dtrain = lightgbm.Dataset(data=X, label=y)

def hyp_lgbm(num_leaves, feature_fraction, bagging_fraction, max_depth, min_split_gain, min_child_weight, learning_rate):
      
        params = {'application':'regression','num_iterations': 5000,
                  'early_stopping_round':100, 'metric':'rmse'}
        params["num_leaves"] = int(round(num_leaves))
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['min_split_gain'] = min_split_gain
        params['min_child_weight'] = min_child_weight
        params['learning_rate'] = learning_rate
        cv_result = lightgbm.cv(params, dtrain, nfold=3, 
                                seed=RANDOM_SEED, stratified=False, 
                                verbose_eval =None, metrics=['rmse'])
        
        return -np.min(cv_result['rmse-mean'])

In [None]:
pds = {
    'num_leaves': (5, 50),
    'feature_fraction': (0.2, 1),
    'bagging_fraction': (0.2, 1),
    'max_depth': (2, 20),
    'min_split_gain': (0.001, 0.1),
    'min_child_weight': (10, 50),
    'learning_rate': (0.01, 0.5),
      }

In [None]:
# optimizer = BayesianOptimization(hyp_lgbm,pds,random_state=RANDOM_SEED)
# optimizer.maximize(init_points=10, n_iter=50)

In [None]:
# optimizer.max['params']

## CAT Tuning

In [None]:
import catboost as cgb

def cat_hyp(depth, bagging_temperature, l2_leaf_reg, learning_rate):
  params = {"iterations": 100,
            "loss_function": "RMSE",
            "verbose": False} 
  params["depth"] = int(round(depth)) 
  params["bagging_temperature"] = bagging_temperature
  params["learning_rate"] = learning_rate
  params["l2_leaf_reg"] = l2_leaf_reg
  
  cat_feat = [] # Categorical features list, we have nothing in this dataset
  cv_dataset = cgb.Pool(data=X,
                        label=y,
                        cat_features=cat_feat)

  scores = cgb.cv(cv_dataset,
              params,
              fold_count=3)
  return -np.min(scores['test-RMSE-mean']) 

In [None]:
# Search space
pds = {'depth': (4, 10),
       'bagging_temperature': (0.1,10),
       'l2_leaf_reg': (0.1, 10),
       'learning_rate': (0.1, 0.2)
        }

In [None]:
# optimizer = BayesianOptimization(cat_hyp, pds, random_state=RANDOM_SEED)
# optimizer.maximize(init_points=10, n_iter=50)

In [None]:
# optimizer.max['params']

## XGB Tuning

In [None]:
import xgboost as xgb

dtrain = xgb.DMatrix(X, y, feature_names=X.columns.values)

def hyp_xgb(max_depth, subsample, colsample_bytree,min_child_weight, gamma, learning_rate):
    params = {
    'objective': 'reg:squarederror',
    'eval_metric':'rmse',
    'nthread':-1
     }
    
    params['max_depth'] = int(round(max_depth))
    params['subsample'] = max(min(subsample, 1), 0)
    params['colsample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['min_child_weight'] = int(min_child_weight)
    params['gamma'] = max(gamma, 0)
    params['learning_rate'] = learning_rate
    scores = xgb.cv(params, dtrain, num_boost_round=500,verbose_eval=False, 
                    early_stopping_rounds=10, nfold=3)
    return -scores['test-rmse-mean'].iloc[-1]

In [None]:
pds ={
  'min_child_weight':(3, 20),
  'gamma':(0, 5),
  'subsample':(0.7, 1),
  'colsample_bytree':(0.1, 1),
  'max_depth': (3, 10),
  'learning_rate': (0.01, 0.5)
}

In [None]:
# optimizer = BayesianOptimization(hyp_xgb, pds, random_state=RANDOM_SEED)
# optimizer.maximize(init_points=4, n_iter=15)

In [None]:
# optimizer.max['params']

## Stacking

We will use the best parameter as a learner and use Linear Regression as the meta-learner. You can also tune the meta-learner parameter. Also, make sure to convert some parameters into an integer.

In [None]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression

In [None]:
param_lgbm = {
     'bagging_fraction': 0.973905385549851,
     'feature_fraction': 0.2945585590881137,
     'learning_rate': 0.03750332268701348,
     'max_depth': int(7.66),
     'min_child_weight': int(41.36),
     'min_split_gain': 0.04033836353603582,
     'num_leaves': int(46.42),
     'application':'regression',
     'num_iterations': 5000,
     'metric': 'rmse'
}

param_cat = {
     'bagging_temperature': 0.31768713094131684,
     'depth': int(8.03),
     'l2_leaf_reg': 1.3525686450404295,
     'learning_rate': 0.2,
     'iterations': 100,
     'loss_function': 'RMSE',
     'verbose': False
}

param_xgb = {
     'colsample_bytree': 0.8119098377889549,
     'gamma': 2.244423418642122,
     'learning_rate': 0.015800631696721114,
     'max_depth': int(9.846),
     'min_child_weight': int(15.664),
     'subsample': 0.82345,
     'objective': 'reg:squarederror',
     'eval_metric':'rmse',
     'num_boost_roun' : 500
}

In [None]:
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn import svm
import lightgbm

In [None]:
estimators = [
        ('lgbm', lightgbm.LGBMRegressor(**param_lgbm, random_state=RANDOM_SEED, n_jobs=-1)),
        ('xgbr', XGBRegressor(**param_xgb, random_state=RANDOM_SEED, nthread=-1)),
        ('cat', CatBoostRegressor(**param_cat)),
        ('mlp', MLPRegressor()) # without tuning
]

In [None]:
reg = StackingRegressor(
    estimators=estimators,
    final_estimator=LinearRegression(),
    n_jobs=-1,
    cv=5
)

reg.fit(X, y)

y_pred = reg.predict(test)

In [None]:
sample['target'] = y_pred
sample.to_csv("submission.csv", index=False)