# `optimization.XGBoostHyperOptimizer`

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import slickml

print(f"Loaded SlickML Version = {slickml.__version__}")

Loaded SlickML Version = 0.2.0


In [3]:
from slickml.optimization import XGBoostHyperOptimizer

help(XGBoostHyperOptimizer)

Help on class XGBoostHyperOptimizer in module slickml.optimization._hyperopt:

class XGBoostHyperOptimizer(slickml.base._estimator.BaseXGBoostEstimator)
 |  XGBoostHyperOptimizer(num_boost_round: Optional[int] = 200, sparse_matrix: Optional[bool] = False, scale_mean: Optional[bool] = False, scale_std: Optional[bool] = False, importance_type: Optional[str] = 'total_gain', params: Optional[Dict[str, Union[str, float, int]]] = None, n_iter: Optional[int] = 100, n_splits: Optional[int] = 4, metrics: Optional[str] = 'auc', objective: Optional[str] = 'binary:logistic', params_bounds: Optional[Dict[str, Any]] = None, early_stopping_rounds: Optional[int] = 20, stratified: Optional[bool] = True, shuffle: Optional[bool] = True, random_state: Optional[int] = 1367, verbose: Optional[bool] = True) -> None
 |  
 |  XGBoost Hyper-Parameters Tuner using HyperOpt Optimization.
 |  
 |  This is wrapper using HyperOpt [hyperopt]_ a Python library for serial and parallel optimization
 |  over search space

### Example 1: `XGBoostHyperOptimizer` for `classification` using `breast-cancer` data set

In [4]:
from sklearn.datasets import load_breast_cancer

clf_data = load_breast_cancer()
X_clf, y_clf = clf_data.data, clf_data.target

In [5]:
xho_clf = XGBoostHyperOptimizer(
    n_iter=10,
    metrics="auc",
    objective="binary:logistic",
)
xho_clf.fit(X_clf, y_clf)

100%|██████████| 10/10 [00:00<00:00, 10.37trial/s, best loss: -0.9942836784208419]


In [6]:
# tuned parameters (or xho_clf.get_best_params())
xho_clf.best_params_

{'colsample_bytree': 0.48,
 'gamma': 0.44,
 'learning_rate': 0.53,
 'max_depth': 2,
 'min_child_weight': 2.0,
 'reg_alpha': 0.05,
 'reg_lambda': 0.63,
 'subsample': 0.98}

In [7]:
# array of all trials (or xho_clf.get_results())
# indexing only the last member for the sake of print
xho_clf.results_[-1]

{'state': 2,
 'tid': 9,
 'spec': None,
 'result': {'loss': -0.5, 'status': 'ok'},
 'misc': {'tid': 9,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'colsample_bytree': [9],
   'gamma': [9],
   'learning_rate': [9],
   'max_depth': [9],
   'min_child_weight': [9],
   'reg_alpha': [9],
   'reg_lambda': [9],
   'subsample': [9]},
  'vals': {'colsample_bytree': [0.81],
   'gamma': [0.88],
   'learning_rate': [0.43],
   'max_depth': [1],
   'min_child_weight': [13.0],
   'reg_alpha': [0.42],
   'reg_lambda': [0.8300000000000001],
   'subsample': [0.18]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2022, 11, 28, 4, 27, 20, 697000),
 'refresh_time': datetime.datetime(2022, 11, 28, 4, 27, 20, 751000)}

In [8]:
# trial object (or xho_clf.get_trials())
xho_clf.trials_

<hyperopt.base.Trials at 0x137f16d30>

In [9]:
# parameter bounds
xho_clf.get_params_bounds()

{'max_depth': <hyperopt.pyll.base.Apply at 0x137f24400>,
 'learning_rate': <hyperopt.pyll.base.Apply at 0x137f24640>,
 'min_child_weight': <hyperopt.pyll.base.Apply at 0x137f24790>,
 'colsample_bytree': <hyperopt.pyll.base.Apply at 0x137f248e0>,
 'subsample': <hyperopt.pyll.base.Apply at 0x137f24a30>,
 'gamma': <hyperopt.pyll.base.Apply at 0x137f24b80>,
 'reg_alpha': <hyperopt.pyll.base.Apply at 0x137f24cd0>,
 'reg_lambda': <hyperopt.pyll.base.Apply at 0x137f24e20>}

### Example 2: `XGBoostHyperOptimizer` for `regression` using `california-housing` data set

In [10]:
from sklearn.datasets import fetch_california_housing

reg_data = fetch_california_housing()
X_reg, y_reg = reg_data.data, reg_data.target

In [11]:
xho_reg = XGBoostHyperOptimizer(
    n_iter=10,
    metrics="rmse",
    objective="reg:squarederror",
)
xho_reg.fit(X_reg, y_reg)

100%|██████████| 10/10 [00:07<00:00,  1.39trial/s, best loss: 0.4888611586512855]


In [12]:
# hyper-parameter optimization results (or xho_reg.results_)
xho_reg.get_results()[0]

{'state': 2,
 'tid': 0,
 'spec': None,
 'result': {'loss': 0.5048283668706345, 'status': 'ok'},
 'misc': {'tid': 0,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'colsample_bytree': [0],
   'gamma': [0],
   'learning_rate': [0],
   'max_depth': [0],
   'min_child_weight': [0],
   'reg_alpha': [0],
   'reg_lambda': [0],
   'subsample': [0]},
  'vals': {'colsample_bytree': [0.68],
   'gamma': [0.05],
   'learning_rate': [0.84],
   'max_depth': [0],
   'min_child_weight': [12.0],
   'reg_alpha': [0.8200000000000001],
   'reg_lambda': [0.5],
   'subsample': [0.87]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2022, 11, 28, 4, 27, 21, 64000),
 'refresh_time': datetime.datetime(2022, 11, 28, 4, 27, 21, 963000)}

In [13]:
# best results (or xbo_ref.best_params_)
xho_reg.get_best_params()

{'colsample_bytree': 0.89,
 'gamma': 0.48,
 'learning_rate': 0.24,
 'max_depth': 2,
 'min_child_weight': 3.0,
 'reg_alpha': 0.44,
 'reg_lambda': 0.8,
 'subsample': 0.39}

### Feel free to add your favorite `Example` via a `pull-request`.
### More details can be found in our [Contributing Document](https://github.com/slickml/slick-ml/blob/master/CONTRIBUTING.md).