# Example 03: General Use of XGBoostRegressorBayesianOpt
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/slickml/slick-ml/blob/master/examples/optimization/example_03_XGBoostRegressorBayesianOpt.ipynb)

### Google Colab Configuration

In [1]:
# !git clone https://github.com/slickml/slick-ml.git
# %cd slick-ml
# !pip install -r requirements.txt

### Local Environment Configuration

In [2]:
# # Change path to project root
%cd ../..

/home/amirhessam/Documents/GitHub/slick-ml


### Import Python Libraries

In [3]:
%load_ext autoreload

# widen the screen
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# change the path and loading class
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns

In [5]:
%autoreload
from slickml.optimization import XGBoostRegressorBayesianOpt

_____
# XGBoostRegressorBayesianOpt Docstring

In [6]:
help(XGBoostRegressorBayesianOpt)

Help on class XGBoostRegressorBayesianOpt in module slickml.optimization:

class XGBoostRegressorBayesianOpt(slickml.regression.XGBoostCVRegressor)
 |  XGBoostRegressorBayesianOpt(n_iter=None, init_points=None, acq=None, pbounds=None, num_boost_round=None, n_splits=None, metrics=None, objective=None, early_stopping_rounds=None, random_state=None, shuffle=True, sparse_matrix=False, scale_mean=False, scale_std=False, importance_type=None, verbose=True)
 |  
 |  XGBoost Hyper-Parameters Tunning using Bayesian Optimization.
 |  This is wrapper using Bayesian Optimization to tune the parameters
 |  for XGBoost Regressor using xgboost.cv() model with n-folds
 |  cross-validation iteratively. This function is pretty useful find
 |  the optimized set of parameters before training. Please note that,
 |  the optimizier objective is always to maximize the target. Therefore,
 |  in case of using a metric such as logloss or error, the negative value
 |  of the metric will be maximized.
 |  
 |  Par

In [7]:
# loading data; note this is a multi regression data
df = pd.read_csv("data/reg_data.csv")
df.head(2)

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,TARGET1,TARGET2
0,1.138,1.543333,289964.0,22.491483,111.289667,7584.0,7584.0,737.156,561.15,823.713,109600.0,99800.0,594700.0,101900.0,0.07137,0.082,0.95,0.975
1,2.088,3.086667,6960180.0,22.9361,113.807817,28204.0,28204.0,908.551,561.15,854.808,133100.0,99800.0,728200.0,101900.0,0.10655,0.287,0.95,0.975


In [8]:

# define X, y based on one of the targets
y = df.TARGET1.values
X = df.drop(["TARGET1", "TARGET2"], axis=1)

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    random_state=1367)

In [18]:
# define bayesian optimizer 
xbo = XGBoostRegressorBayesianOpt(n_iter=10,
                                  init_points=5,
                                  metrics="rmse")

In [19]:
# train the optimizer on train set
xbo.fit(X_train, y_train)

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-0.01472 [0m | [0m 0.8975  [0m | [0m 0.04571 [0m | [0m 0.6628  [0m | [0m 4.238   [0m | [0m 1.436   [0m | [0m 0.3064  [0m | [0m 0.7136  [0m | [0m 0.1931  [0m |
| [0m 2       [0m | [0m-0.01472 [0m | [0m 0.7904  [0m | [0m 0.6447  [0m | [0m 0.9152  [0m | [0m 3.334   [0m | [0m 3.238   [0m | [0m 0.7772  [0m | [0m 0.269   [0m | [0m 0.9726  [0m |
| [0m 3       [0m | [0m-0.01472 [0m | [0m 0.8498  [0m | [0m 0.6044  [0m | [0m 0.6874  [0m | [0m 6.651   [0m | [0m 15.7    [0m | [0m 0.061   [0m | [0m 0.5114  [0m | [0m 0.6848  [0m |
| [0m 4       [0m | [0m-0.01472 [0m | [0m 0.7297  [0m | [0m 0.8513  [0m | [0m 0.4627  [0m | [0m 4.757   [0m | [0m 4.965   [0m | [0m 0.9

In [20]:
# optimization results (or xbo.optimization_results_)
xbo.get_optimization_results()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample,rmse
0,0.89753,0.045712,0.662807,4.238468,1.43566,0.306424,0.713585,0.193055,-0.014719
1,0.790404,0.644709,0.91519,3.334492,3.23828,0.777161,0.26901,0.972576,-0.014722
2,0.849819,0.60437,0.687435,6.651023,15.698338,0.061001,0.511379,0.684811,-0.014721
3,0.729727,0.851274,0.462704,4.756996,4.964748,0.932765,0.362983,0.936539,-0.014723
4,0.542456,0.545092,0.878165,6.632704,5.028311,0.184497,0.333049,0.912511,-0.014722
5,0.989919,0.113178,0.499188,4.438035,1.4076,0.382232,0.589994,0.382848,-0.01472
6,0.944747,0.175445,0.988836,3.961777,1.968403,0.143046,0.95142,0.172068,-0.014719
7,0.322294,0.071075,0.960918,3.298961,1.476775,0.118518,0.969743,0.145927,-0.014719
8,0.97855,0.825309,0.42602,3.593515,1.124848,0.00297,0.992008,0.242077,-0.01472
9,0.326916,0.038696,0.723418,3.699749,1.555163,0.176325,0.078883,0.104183,-0.01472


In [21]:
# best performance (or xbo.best_performance_)
xbo.get_best_performance()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample,rmse
0,0.944747,0.175445,0.988836,3.961777,1.968403,0.143046,0.95142,0.172068,-0.014719
1,0.322294,0.071075,0.960918,3.298961,1.476775,0.118518,0.969743,0.145927,-0.014719


In [22]:
# tuned params (or xbo.best_params_)
xbo.get_best_params()

{'colsample_bytree': 0.9447466905645845,
 'gamma': 0.17544515751269651,
 'learning_rate': 0.9888359157005454,
 'max_depth': 3,
 'min_child_weight': 1.9684029067570108,
 'reg_alpha': 0.14304561710132546,
 'reg_lambda': 0.9514199873030839,
 'subsample': 0.17206756072404922}

In [23]:
# optimizer object (or xbo.optimizer_)
xbo.get_optimizer()

<bayes_opt.bayesian_optimization.BayesianOptimization at 0x7fe06f4959d0>

In [24]:
# optimization params boundaries or (xbo.pbounds)
xbo.get_pbounds()

{'max_depth': (2, 7),
 'learning_rate': (0, 1),
 'min_child_weight': (1, 20),
 'colsample_bytree': (0.1, 1.0),
 'subsample': (0.1, 1),
 'gamma': (0, 1),
 'reg_alpha': (0, 1),
 'reg_lambda': (0, 1)}