# Example 02: General Use of XGBoostClassifierHyperOpt
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/slickml/slick-ml/blob/master/examples/optimization/example_02_XGBoostClassifierHyperOpt.ipynb)

### Google Colab Configuration

In [1]:
# !git clone https://github.com/slickml/slick-ml.git
# %cd slick-ml
# !pip install -r requirements.txt

### Local Environment Configuration

In [2]:
# # Change path to project root
%cd ../..

/Users/atahmassebi/Desktop/AmirStuff/GitHub/slick-ml


### Import Python Libraries

In [4]:
%load_ext autoreload

# widen the screen
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style>"))

# change the path and loading class
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
%autoreload
from slickml.optimization import XGBoostClassifierHyperOpt

----
# XGBoostClassifierHyperOpt Docstring

In [6]:
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
X = data.data
y = data.target

In [8]:
import xgboost as xgb

cvr = xgb.cv(
    params={
        "objective": "binary:logistic",
    },
    dtrain=xgb.DMatrix(
        data=X,
        label=y,
    ),
    nfold=4,
    stratified=True,
    metrics="auc",
    early_stopping_rounds=20,
    seed=1367,
    shuffle=True,
)

In [10]:
cvr.iloc[-1:, 0]

9    0.999982
Name: train-auc-mean, dtype: float64

In [11]:
cvr.tail(2)

Unnamed: 0,train-auc-mean,train-auc-std,test-auc-mean,test-auc-std
8,0.99993,9.5e-05,0.989341,0.004216
9,0.999982,3e-05,0.988919,0.004086


In [12]:
cvr.iloc[-1][2]

0.9889189221020893

In [6]:
# define X, y
y = df.CLASS.values
X = df.drop(["CLASS"], axis=1)

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, stratify=y, random_state=1367
)

In [8]:
# define the parameters' bounds

from hyperopt import hp


def get_xgb_params():
    """Define Parameter Space"""
    params = {
        "nthread": 4,
        "booster": "gbtree",
        "tree_method": "hist",
        "objective": "binary:logistic",
        "max_depth": hp.choice("max_depth", range(2, 7)),
        "learning_rate": hp.quniform("learning_rate", 0.01, 0.50, 0.01),
        "gamma": hp.quniform("gamma", 0, 0.50, 0.01),
        "min_child_weight": hp.quniform("min_child_weight", 1, 10, 1),
        "subsample": hp.quniform("subsample", 0.1, 1, 0.01),
        "colsample_bytree": hp.quniform("colsample_bytree", 0.1, 1.0, 0.01),
        "gamma": hp.quniform("gamma", 0.0, 1.0, 0.01),
        "reg_alpha": hp.quniform("reg_alpha", 0.0, 1.0, 0.01),
        "reg_lambda": hp.quniform("reg_lambda", 0.0, 1.0, 0.01),
    }

    return params

In [16]:
# define the parameters' bounds

from hyperopt import hp
from hyperopt.pyll.stochastic import sample


params = {
    "nthread": 4,
    "booster": "gbtree",
    "tree_method": "hist",
    "objective": "binary:logistic",
    "max_depth": [2, 3, 4, 5],
}

In [19]:
p = sample(params)
p

{'booster': 'gbtree',
 'max_depth': (2, 3, 4, 5),
 'nthread': 4,
 'objective': 'binary:logistic',
 'tree_method': 'hist'}

In [14]:
p.update({"x": "foo"})

In [15]:
p

{'booster': 'gbtree',
 'max_depth': 6,
 'nthread': 4,
 'objective': 'binary:logistic',
 'tree_method': 'hist',
 'x': 'foo'}

In [9]:
# initialize XGBoostClassifierHyperOpt
xho = XGBoostClassifierHyperOpt(
    num_boost_round=200,
    metrics="logloss",
    n_splits=3,
    shuffle=True,
    early_stopping_rounds=20,
    func_name="xgb_cv",
    space=get_xgb_params(),
    max_evals=100,
    verbose=False,
)
# fit
xho.fit(X_train, y_train)

100%|████████████████| 100/100 [00:08<00:00, 11.75trial/s, best loss: 0.16043033333333334]


### Best set of parameters from all runs

In [10]:
xho.get_optimization_results()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample
0,0.74,0.21,0.31,4,7.0,0.85,0.62,0.5


### Results from each trial

In [11]:
import pprint

pprint.pprint(xho.get_optimization_trials().trials)

[{'book_time': datetime.datetime(2021, 9, 5, 2, 55, 16, 512000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [0],
                    'gamma': [0],
                    'learning_rate': [0],
                    'max_depth': [0],
                    'min_child_weight': [0],
                    'reg_alpha': [0],
                    'reg_lambda': [0],
                    'subsample': [0]},
           'tid': 0,
           'vals': {'colsample_bytree': [0.78],
                    'gamma': [0.66],
                    'learning_rate': [0.22],
                    'max_depth': [2],
                    'min_child_weight': [5.0],
                    'reg_alpha': [0.43],
                    'reg_lambda': [0.29],
                    'subsample': [0.47000000000000003]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 16, 566000),
  'result': {'loss': 0.23769966666666667, 'sta

  'state': 2,
  'tid': 20,
  'version': 0},
 {'book_time': datetime.datetime(2021, 9, 5, 2, 55, 17, 946000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [21],
                    'gamma': [21],
                    'learning_rate': [21],
                    'max_depth': [21],
                    'min_child_weight': [21],
                    'reg_alpha': [21],
                    'reg_lambda': [21],
                    'subsample': [21]},
           'tid': 21,
           'vals': {'colsample_bytree': [0.63],
                    'gamma': [0.4],
                    'learning_rate': [0.42],
                    'max_depth': [4],
                    'min_child_weight': [3.0],
                    'reg_alpha': [0.97],
                    'reg_lambda': [0.76],
                    'subsample': [0.59]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 17, 982000),
  'result

  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 18, 686000),
  'result': {'loss': 0.25560466666666665, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 28,
  'version': 0},
 {'book_time': datetime.datetime(2021, 9, 5, 2, 55, 18, 703000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [29],
                    'gamma': [29],
                    'learning_rate': [29],
                    'max_depth': [29],
                    'min_child_weight': [29],
                    'reg_alpha': [29],
                    'reg_lambda': [29],
                    'subsample': [29]},
           'tid': 29,
           'vals': {'colsample_bytree': [0.72],
                    'gamma': [0.14],
                    'learning_rate': [0.49],
                    'max_depth': [4],
                    'min_child_weight': [10.0],
                    'reg_alpha': [0.72],
                    'reg_lambda': [1.0],
                    '

                    'subsample': [0.55]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 19, 806000),
  'result': {'loss': 0.318191, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 44,
  'version': 0},
 {'book_time': datetime.datetime(2021, 9, 5, 2, 55, 19, 826000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [45],
                    'gamma': [45],
                    'learning_rate': [45],
                    'max_depth': [45],
                    'min_child_weight': [45],
                    'reg_alpha': [45],
                    'reg_lambda': [45],
                    'subsample': [45]},
           'tid': 45,
           'vals': {'colsample_bytree': [0.77],
                    'gamma': [0.28],
                    'learning_rate': [0.35000000000000003],
                    'max_depth': [4],
                    'min_child_weight': [7.0],
            

                    'min_child_weight': [7.0],
                    'reg_alpha': [0.29],
                    'reg_lambda': [0.8],
                    'subsample': [0.74]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 20, 593000),
  'result': {'loss': 0.28215033333333334, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 52,
  'version': 0},
 {'book_time': datetime.datetime(2021, 9, 5, 2, 55, 20, 615000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [53],
                    'gamma': [53],
                    'learning_rate': [53],
                    'max_depth': [53],
                    'min_child_weight': [53],
                    'reg_alpha': [53],
                    'reg_lambda': [53],
                    'subsample': [53]},
           'tid': 53,
           'vals': {'colsample_bytree': [0.6900000000000001],
                    'gamma': [0.19],
   

 {'book_time': datetime.datetime(2021, 9, 5, 2, 55, 22, 4000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [69],
                    'gamma': [69],
                    'learning_rate': [69],
                    'max_depth': [69],
                    'min_child_weight': [69],
                    'reg_alpha': [69],
                    'reg_lambda': [69],
                    'subsample': [69]},
           'tid': 69,
           'vals': {'colsample_bytree': [0.74],
                    'gamma': [0.21],
                    'learning_rate': [0.31],
                    'max_depth': [4],
                    'min_child_weight': [7.0],
                    'reg_alpha': [0.85],
                    'reg_lambda': [0.62],
                    'subsample': [0.5]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 22, 40000),
  'result': {'loss': 0.16043033333333334, 'status': 'ok'

                    'reg_lambda': [79],
                    'subsample': [79]},
           'tid': 79,
           'vals': {'colsample_bytree': [0.51],
                    'gamma': [0.22],
                    'learning_rate': [0.26],
                    'max_depth': [0],
                    'min_child_weight': [7.0],
                    'reg_alpha': [0.26],
                    'reg_lambda': [0.97],
                    'subsample': [0.5700000000000001]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 23, 185000),
  'result': {'loss': 0.33107400000000003, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 79,
  'version': 0},
 {'book_time': datetime.datetime(2021, 9, 5, 2, 55, 23, 210000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [80],
                    'gamma': [80],
                    'learning_rate': [80],
                    'max_depth': [80],
    

           'idxs': {'colsample_bytree': [98],
                    'gamma': [98],
                    'learning_rate': [98],
                    'max_depth': [98],
                    'min_child_weight': [98],
                    'reg_alpha': [98],
                    'reg_lambda': [98],
                    'subsample': [98]},
           'tid': 98,
           'vals': {'colsample_bytree': [0.35000000000000003],
                    'gamma': [0.5700000000000001],
                    'learning_rate': [0.43],
                    'max_depth': [3],
                    'min_child_weight': [9.0],
                    'reg_alpha': [0.56],
                    'reg_lambda': [0.32],
                    'subsample': [0.92]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 9, 5, 2, 55, 24, 939000),
  'result': {'loss': 0.3859086666666667, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 98,
  'version': 0},
 {'book_time': datetime.datetime(2021, 9, 5, 2, 5