# Example 04: General Use of XGBoostRegressorHyperOpt
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/slickml/slick-ml/blob/master/examples/optimization/example_04_XGBoostRegressorrHyperOpt.ipynb)

### Google Colab Configuration

In [1]:
# !git clone https://github.com/slickml/slick-ml.git
# %cd slick-ml
# !pip install -r requirements.txt

### Local Environment Configuration

In [2]:
# # Change path to project root
%cd ../..

/home/amirhessam/Documents/GitHub/slick-ml


### Import Python Libraries

In [3]:
%load_ext autoreload

# widen the screen
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# change the path and loading class
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
%autoreload
from slickml.optimization import XGBoostRegressorHyperOpt

----
# XGBoostRegressorHyperOpt Docstring

In [5]:
# loading data; note this is a multi regression data
df = pd.read_csv("data/reg_data.csv")
df.head(2)

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,TARGET1,TARGET2
0,1.138,1.543333,289964.0,22.491483,111.289667,7584.0,7584.0,737.156,561.15,823.713,109600.0,99800.0,594700.0,101900.0,0.07137,0.082,0.95,0.975
1,2.088,3.086667,6960180.0,22.9361,113.807817,28204.0,28204.0,908.551,561.15,854.808,133100.0,99800.0,728200.0,101900.0,0.10655,0.287,0.95,0.975


In [6]:
# define X, y based on one of the targets
y = df.TARGET1.values
X = df.drop(["TARGET1", "TARGET2"], axis=1)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    random_state=1367)

In [8]:
# define the parameters' bounds
from hyperopt import hp

def get_xgb_params():
    """ Define Parameter Space"""
    params = {
        "nthread": 4,
        "booster": "gbtree",
        "tree_method": "hist",
        "objective": "reg:squarederror",
        "max_depth": hp.choice("max_depth", range(2, 8)),
        "learning_rate": hp.quniform("learning_rate", 0.01, 0.50, 0.01),
        "gamma": hp.quniform("gamma", 0, 0.50, 0.01),
        "min_child_weight": hp.quniform("min_child_weight", 1, 20, 1),
        "subsample": hp.quniform("subsample", 0.1, 1.0, 0.01),
        "colsample_bytree": hp.quniform("colsample_bytree", 0.1, 1.0, 0.01),
        "gamma": hp.quniform("gamma", 0.0, 1.0, 0.01),
        "reg_alpha": hp.quniform("reg_alpha", 0.0, 1.0, 0.01),
        "reg_lambda": hp.quniform("reg_lambda", 0.0, 1.0, 0.01),
    }
    
    return params

In [9]:
hp.choice("max_depth", range(2, 10, 1))

<hyperopt.pyll.base.Apply at 0x7f4280a3d290>

In [10]:
# initialize XGBoostRegressorHyperOpt
xho = XGBoostRegressorHyperOpt(num_boost_round=200,
                                metrics="rmse",
                                n_splits=4,
                                shuffle=True,
                                early_stopping_rounds=20,
                                func_name="xgb_cv",
                                space=get_xgb_params(),
                                max_evals=100,
                                verbose=False
                               )
# fit 
xho.fit(X_train,y_train)

100%|██████████| 100/100 [01:07<00:00,  1.47trial/s, best loss: 0.007824]


### Best set of parameters from all runs

In [11]:
xho.get_optimization_results()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample
0,0.69,0.41,0.08,1,10.0,0.59,0.2,0.52


### Results from each trial

In [12]:
import pprint
pprint.pprint(xho.get_optimization_trials().trials)

[{'book_time': datetime.datetime(2021, 6, 6, 23, 35, 22, 484000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [0],
                    'gamma': [0],
                    'learning_rate': [0],
                    'max_depth': [0],
                    'min_child_weight': [0],
                    'reg_alpha': [0],
                    'reg_lambda': [0],
                    'subsample': [0]},
           'tid': 0,
           'vals': {'colsample_bytree': [0.9],
                    'gamma': [0.23],
                    'learning_rate': [0.29],
                    'max_depth': [2],
                    'min_child_weight': [8.0],
                    'reg_alpha': [0.59],
                    'reg_lambda': [0.87],
                    'subsample': [0.97]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 35, 23, 37000),
  'result': {'loss': 0.014721, 'status': 'ok'},
  'spec': Non

  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 35, 28, 754000),
  'result': {'loss': 0.00956175, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 12,
  'version': 0},
 {'book_time': datetime.datetime(2021, 6, 6, 23, 35, 28, 762000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [13],
                    'gamma': [13],
                    'learning_rate': [13],
                    'max_depth': [13],
                    'min_child_weight': [13],
                    'reg_alpha': [13],
                    'reg_lambda': [13],
                    'subsample': [13]},
           'tid': 13,
           'vals': {'colsample_bytree': [0.87],
                    'gamma': [0.67],
                    'learning_rate': [0.23],
                    'max_depth': [4],
                    'min_child_weight': [16.0],
                    'reg_alpha': [0.42],
                    'reg_lambda': [0.81],
          

                    'subsample': [0.62]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 35, 36, 916000),
  'result': {'loss': 0.01472125, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 25,
  'version': 0},
 {'book_time': datetime.datetime(2021, 6, 6, 23, 35, 36, 955000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [26],
                    'gamma': [26],
                    'learning_rate': [26],
                    'max_depth': [26],
                    'min_child_weight': [26],
                    'reg_alpha': [26],
                    'reg_lambda': [26],
                    'subsample': [26]},
           'tid': 26,
           'vals': {'colsample_bytree': [0.47000000000000003],
                    'gamma': [0.38],
                    'learning_rate': [0.49],
                    'max_depth': [1],
                    'min_child_weight': [6.0],
        

  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [41],
                    'gamma': [41],
                    'learning_rate': [41],
                    'max_depth': [41],
                    'min_child_weight': [41],
                    'reg_alpha': [41],
                    'reg_lambda': [41],
                    'subsample': [41]},
           'tid': 41,
           'vals': {'colsample_bytree': [0.51],
                    'gamma': [0.01],
                    'learning_rate': [0.28],
                    'max_depth': [3],
                    'min_child_weight': [11.0],
                    'reg_alpha': [0.75],
                    'reg_lambda': [0.1],
                    'subsample': [0.25]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 35, 44, 910000),
  'result': {'loss': 0.014721, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 41,
  'version': 0},
 {'book_time': datetime.dat

                    'max_depth': [1],
                    'min_child_weight': [15.0],
                    'reg_alpha': [0.91],
                    'reg_lambda': [0.76],
                    'subsample': [0.92]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 35, 55, 136000),
  'result': {'loss': 0.01472125, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 53,
  'version': 0},
 {'book_time': datetime.datetime(2021, 6, 6, 23, 35, 55, 221000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [54],
                    'gamma': [54],
                    'learning_rate': [54],
                    'max_depth': [54],
                    'min_child_weight': [54],
                    'reg_alpha': [54],
                    'reg_lambda': [54],
                    'subsample': [54]},
           'tid': 54,
           'vals': {'colsample_bytree': [0.76],
                    '

 {'book_time': datetime.datetime(2021, 6, 6, 23, 36, 11, 447000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [71],
                    'gamma': [71],
                    'learning_rate': [71],
                    'max_depth': [71],
                    'min_child_weight': [71],
                    'reg_alpha': [71],
                    'reg_lambda': [71],
                    'subsample': [71]},
           'tid': 71,
           'vals': {'colsample_bytree': [0.6900000000000001],
                    'gamma': [0.91],
                    'learning_rate': [0.21],
                    'max_depth': [1],
                    'min_child_weight': [9.0],
                    'reg_alpha': [0.12],
                    'reg_lambda': [0.09],
                    'subsample': [0.15]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 36, 12, 270000),
  'result': {'loss': 0.014721, 'stat

 {'book_time': datetime.datetime(2021, 6, 6, 23, 36, 20, 7000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [82],
                    'gamma': [82],
                    'learning_rate': [82],
                    'max_depth': [82],
                    'min_child_weight': [82],
                    'reg_alpha': [82],
                    'reg_lambda': [82],
                    'subsample': [82]},
           'tid': 82,
           'vals': {'colsample_bytree': [0.8300000000000001],
                    'gamma': [0.81],
                    'learning_rate': [0.48],
                    'max_depth': [2],
                    'min_child_weight': [9.0],
                    'reg_alpha': [0.48],
                    'reg_lambda': [0.2],
                    'subsample': [0.73]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 6, 6, 23, 36, 20, 470000),
  'result': {'loss': 0.014721, 'status'

  'result': {'loss': 0.01472075, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 99,
  'version': 0}]
