# Example 02: General Use of XGBoostClassifierHyperOpt
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/slickml/slick-ml/blob/master/examples/optimization/example_02_XGBoostClassifierHyperOpt.ipynb)

### Google Colab Configuration

In [1]:
# !git clone https://github.com/slickml/slick-ml.git
# %cd slick-ml
# !pip install -r requirements.txt

### Local Environment Configuration

In [2]:
# # Change path to project root
%cd ../..

/home/amirhessam/Documents/GitHub/slick-ml


### Import Python Libraries

In [3]:
%load_ext autoreload

# widen the screen
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# change the path and loading class
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
%autoreload
from slickml.optimization import XGBoostClassifierHyperOpt

----
# XGBoostClassifierHyperOpt Docstring

In [5]:
# loading data
df = pd.read_csv("data/clf_data.csv")
df.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,CLASS
0,5.7,4.7,3.7,58.0,249.3,456.47,0
1,7.7,6.6,4.1,20.0,260.92,443.43,1
2,6.2,4.3,4.6,59.0,255.63,478.96,1
3,5.7,4.4,3.8,49.0,195.28,381.94,0
4,9.1,4.7,4.6,17.0,259.55,395.67,1


In [6]:
# define X, y
y = df.CLASS.values
X = df.drop(["CLASS"], axis=1)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    stratify=y,
                                                    random_state=1367)

In [8]:
# define the parameters' bounds

from hyperopt import hp
def get_xgb_params():
    """ Define Parameter Space"""
    params = {
        "nthread": 4,
        "booster": "gbtree",
        "tree_method": "hist",
        "objective": "binary:logistic",
        "max_depth": hp.choice("max_depth", range(5, 100, 5)),
        "learning_rate": hp.quniform("learning_rate", 0.01, 0.50, 0.01),
        "gamma": hp.quniform("gamma", 0, 0.50, 0.01),
        "min_child_weight": hp.quniform("min_child_weight", 1, 10, 1),
        "subsample": hp.quniform("subsample", 0.1, 1, 0.01),
        "colsample_bytree": hp.quniform("colsample_bytree", 0.1, 1.0, 0.01),
        "gamma": hp.quniform("gamma", 0.0, 1.0, 0.01),
        "reg_alpha": hp.quniform("reg_alpha", 0.0, 1.0, 0.01),
        "reg_lambda": hp.quniform("reg_lambda", 0.0, 1.0, 0.01),
    }
    
    return params

In [9]:
# initialize XGBoostClassifierHyperOpt
xho = XGBoostClassifierHyperOpt(num_boost_round=200,
                                metrics="logloss",
                                n_splits=3,
                                shuffle=True,
                                early_stopping_rounds=20,
                                func_name="xgb_cv",
                                space=get_xgb_params(),
                                max_evals=100,
                                verbose=False
                               )
# fit 
xho.fit(X_train,y_train)

100%|██████████| 100/100 [00:12<00:00,  7.92trial/s, best loss: 0.127302]          


### Best set of parameters from all runs

In [10]:
xho.get_optimization_results()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample
0,0.3,0.34,0.32,16,1.0,0.12,0.12,0.56


### Results from each trial

In [11]:
import pprint
pprint.pprint(xho.get_optimization_trials().trials)

[{'book_time': datetime.datetime(2021, 5, 31, 21, 24, 2, 729000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [0],
                    'gamma': [0],
                    'learning_rate': [0],
                    'max_depth': [0],
                    'min_child_weight': [0],
                    'reg_alpha': [0],
                    'reg_lambda': [0],
                    'subsample': [0]},
           'tid': 0,
           'vals': {'colsample_bytree': [0.14],
                    'gamma': [0.81],
                    'learning_rate': [0.18],
                    'max_depth': [12],
                    'min_child_weight': [7.0],
                    'reg_alpha': [0.9500000000000001],
                    'reg_lambda': [0.89],
                    'subsample': [0.49]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 24, 2, 814000),
  'result': {'loss': 0.4717653333333333, 'st

  'version': 0},
 {'book_time': datetime.datetime(2021, 5, 31, 21, 24, 3, 901000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [14],
                    'gamma': [14],
                    'learning_rate': [14],
                    'max_depth': [14],
                    'min_child_weight': [14],
                    'reg_alpha': [14],
                    'reg_lambda': [14],
                    'subsample': [14]},
           'tid': 14,
           'vals': {'colsample_bytree': [0.76],
                    'gamma': [0.03],
                    'learning_rate': [0.43],
                    'max_depth': [3],
                    'min_child_weight': [9.0],
                    'reg_alpha': [0.97],
                    'reg_lambda': [0.74],
                    'subsample': [0.68]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 24, 3, 950000),
  'result': {'loss': 0.3270059999

  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [24],
                    'gamma': [24],
                    'learning_rate': [24],
                    'max_depth': [24],
                    'min_child_weight': [24],
                    'reg_alpha': [24],
                    'reg_lambda': [24],
                    'subsample': [24]},
           'tid': 24,
           'vals': {'colsample_bytree': [1.0],
                    'gamma': [0.18],
                    'learning_rate': [0.28],
                    'max_depth': [5],
                    'min_child_weight': [4.0],
                    'reg_alpha': [0.02],
                    'reg_lambda': [0.25],
                    'subsample': [0.37]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 24, 4, 759000),
  'result': {'loss': 0.3048663333333333, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 24,
  'version': 0},
 {'book_time': dat

  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 24, 7, 13000),
  'result': {'loss': 0.34278766666666666, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 41,
  'version': 0},
 {'book_time': datetime.datetime(2021, 5, 31, 21, 24, 7, 63000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [42],
                    'gamma': [42],
                    'learning_rate': [42],
                    'max_depth': [42],
                    'min_child_weight': [42],
                    'reg_alpha': [42],
                    'reg_lambda': [42],
                    'subsample': [42]},
           'tid': 42,
           'vals': {'colsample_bytree': [0.49],
                    'gamma': [0.55],
                    'learning_rate': [0.35000000000000003],
                    'max_depth': [17],
                    'min_child_weight': [3.0],
                    'reg_alpha': [0.5700000000000001],
                

  'result': {'loss': 0.196128, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 52,
  'version': 0},
 {'book_time': datetime.datetime(2021, 5, 31, 21, 24, 8, 541000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [53],
                    'gamma': [53],
                    'learning_rate': [53],
                    'max_depth': [53],
                    'min_child_weight': [53],
                    'reg_alpha': [53],
                    'reg_lambda': [53],
                    'subsample': [53]},
           'tid': 53,
           'vals': {'colsample_bytree': [0.29],
                    'gamma': [0.5700000000000001],
                    'learning_rate': [0.36],
                    'max_depth': [0],
                    'min_child_weight': [7.0],
                    'reg_alpha': [0.74],
                    'reg_lambda': [0.11],
                    'subsample': [0.84]},
           'workdir': None},
  'owner': Non

  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [72],
                    'gamma': [72],
                    'learning_rate': [72],
                    'max_depth': [72],
                    'min_child_weight': [72],
                    'reg_alpha': [72],
                    'reg_lambda': [72],
                    'subsample': [72]},
           'tid': 72,
           'vals': {'colsample_bytree': [0.16],
                    'gamma': [0.21],
                    'learning_rate': [0.43],
                    'max_depth': [6],
                    'min_child_weight': [2.0],
                    'reg_alpha': [0.18],
                    'reg_lambda': [0.58],
                    'subsample': [0.63]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 24, 11, 60000),
  'result': {'loss': 0.40359533333333336, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 72,
  'version': 0

           'vals': {'colsample_bytree': [0.14],
                    'gamma': [0.39],
                    'learning_rate': [0.44],
                    'max_depth': [6],
                    'min_child_weight': [2.0],
                    'reg_alpha': [0.19],
                    'reg_lambda': [0.4],
                    'subsample': [0.25]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 24, 13, 115000),
  'result': {'loss': 0.3755596666666667, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 85,
  'version': 0},
 {'book_time': datetime.datetime(2021, 5, 31, 21, 24, 13, 185000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [86],
                    'gamma': [86],
                    'learning_rate': [86],
                    'max_depth': [86],
                    'min_child_weight': [86],
                    'reg_alpha': [86],
                    'reg_lambda':