# Example 02: General Use of XGBoostClassifierHyperOpt
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/slickml/slick-ml/blob/master/examples/optimization/example_02_XGBoostClassifierHyperOpt.ipynb)

### Google Colab Configuration

In [1]:
# !git clone https://github.com/slickml/slick-ml.git
# %cd slick-ml
# !pip install -r requirements.txt

### Local Environment Configuration

In [2]:
# # Change path to project root
%cd ../..

/home/amirhessam/Documents/GitHub/slick-ml


### Import Python Libraries

In [3]:
%load_ext autoreload

# widen the screen
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# change the path and loading class
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
%autoreload
from slickml.optimization import XGBoostClassifierHyperOpt

----
# XGBoostClassifierHyperOpt Docstring

In [5]:
# loading data
df = pd.read_csv("data/clf_data.csv")
df.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,CLASS
0,5.7,4.7,3.7,58.0,249.3,456.47,0
1,7.7,6.6,4.1,20.0,260.92,443.43,1
2,6.2,4.3,4.6,59.0,255.63,478.96,1
3,5.7,4.4,3.8,49.0,195.28,381.94,0
4,9.1,4.7,4.6,17.0,259.55,395.67,1


In [6]:
# define X, y
y = df.CLASS.values
X = df.drop(["CLASS"], axis=1)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    stratify=y,
                                                    random_state=1367)

In [8]:
from hyperopt import hp
def get_xgb_params():
    """ Define Parameter Space"""
    params = {
        "nthread": 4,
        "booster": "gbtree",
        "tree_method": "hist",
        "objective": "binary:logistic",
        "max_depth": hp.choice("max_depth", range(5, 100, 5)),
        "learning_rate": hp.quniform("learning_rate", 0.01, 0.50, 0.01),
        "gamma": hp.quniform("gamma", 0, 0.50, 0.01),
        "min_child_weight": hp.quniform("min_child_weight", 1, 10, 1),
        "subsample": hp.quniform("subsample", 0.1, 1, 0.01),
        "colsample_bytree": hp.quniform("colsample_bytree", 0.1, 1.0, 0.01),
        "gamma": hp.quniform("gamma", 0.0, 1.0, 0.01),
        "reg_alpha": hp.quniform("reg_alpha", 0.0, 1.0, 0.01),
        "reg_lambda": hp.quniform("reg_lambda", 0.0, 1.0, 0.01),
    }
    
    return params

In [None]:
XGBoostClassifierHyperOpt()

In [9]:
# %autoreload
from slickml.classification import XGBoostClassifier
# from slickml.optimization import XGBoostClassifierHyperOpt

xho = XGBoostClassifierHyperOpt(num_boost_round=200,
                                metrics="logloss",
                                n_splits=3,
                                shuffle=True,
                                early_stopping_rounds=20,
                                func_name="xgb_cv",
                                space=get_xgb_params(),
                                max_evals=100,
                                verbose=False
                               )

xho.fit(X_train,y_train)

100%|██████████| 100/100 [00:12<00:00,  7.80trial/s, best loss: 0.13210466666666668]


### Best set of parameters from all runs

In [11]:
xho.get_optimization_results()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample
0,0.83,0.65,0.34,10,3.0,0.96,0.43,0.9


### Results from each trial

In [12]:
import pprint
pprint.pprint(xho.get_optimization_trials().trials)

[{'book_time': datetime.datetime(2021, 5, 31, 21, 17, 50, 555000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [0],
                    'gamma': [0],
                    'learning_rate': [0],
                    'max_depth': [0],
                    'min_child_weight': [0],
                    'reg_alpha': [0],
                    'reg_lambda': [0],
                    'subsample': [0]},
           'tid': 0,
           'vals': {'colsample_bytree': [0.5700000000000001],
                    'gamma': [0.6900000000000001],
                    'learning_rate': [0.19],
                    'max_depth': [10],
                    'min_child_weight': [7.0],
                    'reg_alpha': [1.0],
                    'reg_lambda': [0.9500000000000001],
                    'subsample': [0.67]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 17, 50, 704000),
  'result': {'l

  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [30],
                    'gamma': [30],
                    'learning_rate': [30],
                    'max_depth': [30],
                    'min_child_weight': [30],
                    'reg_alpha': [30],
                    'reg_lambda': [30],
                    'subsample': [30]},
           'tid': 30,
           'vals': {'colsample_bytree': [0.45],
                    'gamma': [0.64],
                    'learning_rate': [0.23],
                    'max_depth': [12],
                    'min_child_weight': [5.0],
                    'reg_alpha': [0.05],
                    'reg_lambda': [0.89],
                    'subsample': [0.86]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 17, 53, 426000),
  'result': {'loss': 0.40015399999999995, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 30,
  'version':

                    'min_child_weight': [59],
                    'reg_alpha': [59],
                    'reg_lambda': [59],
                    'subsample': [59]},
           'tid': 59,
           'vals': {'colsample_bytree': [0.8300000000000001],
                    'gamma': [0.98],
                    'learning_rate': [0.41000000000000003],
                    'max_depth': [6],
                    'min_child_weight': [6.0],
                    'reg_alpha': [0.52],
                    'reg_lambda': [0.2],
                    'subsample': [0.92]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 17, 57, 346000),
  'result': {'loss': 0.27013533333333334, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 59,
  'version': 0},
 {'book_time': datetime.datetime(2021, 5, 31, 21, 17, 57, 407000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [60],
                  

                    'subsample': [0.6]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2021, 5, 31, 21, 18, 1, 720000),
  'result': {'loss': 0.25003566666666666, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 88,
  'version': 0},
 {'book_time': datetime.datetime(2021, 5, 31, 21, 18, 1, 796000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'colsample_bytree': [89],
                    'gamma': [89],
                    'learning_rate': [89],
                    'max_depth': [89],
                    'min_child_weight': [89],
                    'reg_alpha': [89],
                    'reg_lambda': [89],
                    'subsample': [89]},
           'tid': 89,
           'vals': {'colsample_bytree': [0.84],
                    'gamma': [0.23],
                    'learning_rate': [0.15],
                    'max_depth': [16],
                    'min_child_weight': [1.0],
              