# XGBoost hyperparameter optimalization using Ax framework

XGBoost (Extreme Gradient Boosting) belongs to a family of boosting algorithms and uses the gradient boosting (GBM) framework. Boosting is a sequential technique which works on the principle of an ensemble. It combines a set of weak learners and delivers improved prediction accuracy.

Ax is an open-source package from PyTorch that helps you find a minima for any function over the range of parameters. One of the useful ML applications is to find the best hyperparameters for training a model to achieve minimal loss.

Sources:
- https://xgboost.readthedocs.io/en/latest/tutorials/model.html
- https://medium.com/@juniormiranda_23768/ensemble-methods-tuning-a-xgboost-model-with-scikit-learn-54ff669f988a
- https://hackernoon.com/want-a-complete-guide-for-xgboost-model-in-python-using-scikit-learn-sc11f31bq
- https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html
- https://www.kaggle.com/stuarthallows/using-xgboost-with-scikit-learn
- https://github.com/dmlc/xgboost/blob/master/demo/guide-python/sklearn_examples.py
- https://ax.dev/docs/core.html
- https://github.com/facebook/Ax/blob/master/tutorials/gpei_svm.ipynb
- https://xgboost.readthedocs.io/en/latest/parameter.html
- https://towardsdatascience.com/rocking-hyperparameter-tuning-with-pytorchs-ax-package-1c2dd79f2948
- https://www.kaggle.com/nanomathias/bayesian-optimization-of-xgboost-lb-0-9769


## Gradient Boosting without default parameters

In [1]:
import numpy as np
from sklearn import datasets

# Generate data for binary classification
X, y = datasets.make_hastie_10_2(n_samples=15000, random_state=1)
X = X.astype(np.float32)

# map labels from {-1, 1} to {0, 1}
labels, y = np.unique(y, return_inverse=True)

X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]

In [2]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

# parameters: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst
# auc: Area under the curve
xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=42)
# learn model
xgb_model.fit(X_train, y_train)
# predict the data
y_pred = xgb_model.predict(X_test)
# get accuracy of prediction
score = accuracy_score(y_test, y_pred)
print ('accuracy_score:', score)

accuracy_score: 0.8806923076923077


# Optimalization of XGBoost using Ax optimalization framework

In [3]:
# see: https://github.com/facebook/Ax/blob/master/tutorials/gpei_svm.ipynb

from ax import (
    ParameterType,
    RangeParameter,
    SearchSpace,
    SimpleExperiment,
    modelbridge,
    models
)

In [4]:
# The evaluation function takes in a parameterization (set of parameter values) 
# and computes all the metrics needed in optimization.
# It should output a dictionary of metric names to tuple of mean and standard error.

def xgb_evaluation_function(
    parameterization, # dict of parameter names to values of those parameters
    weight=None, # required by the evaluation function signature
):
    xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=42,
                                  colsample_bylevel=parameterization["colsample_bylevel"],
                                  colsample_bytree=parameterization["colsample_bytree"],
                                  gamma=parameterization["gamma"],
                                  learning_rate=parameterization["learning_rate"],
                                  max_delta_step=parameterization["max_delta_step"],
                                  max_depth=parameterization["max_depth"],
                                  min_child_weight=parameterization["min_child_weight"],
                                  n_estimators=parameterization["n_estimators"],
                                  reg_alpha=parameterization["reg_alpha"],
                                  reg_lambda=parameterization["reg_lambda"],
                                  scale_pos_weight=parameterization["scale_pos_weight"],
                                  subsample=parameterization["subsample"]
                                 )
    
    # learn the model
    xgb_model.fit(X_train, y_train)
    
    # predict data
    y_pred = xgb_model.predict(X_test)   
    
    # get accuracy of prediction
    accuracy = accuracy_score(y_test, y_pred)
    
    print('accuracy:', accuracy)
    return {'accuracy': (accuracy, 0.0)}

In [5]:
# Define search space - set of input model parameters with allowed values

xgb_search_space = SearchSpace(parameters=[
    RangeParameter(
        name='colsample_bylevel', parameter_type=ParameterType.FLOAT, lower=0.01, upper=1.0, log_scale=False
    ),
    RangeParameter(
        name='colsample_bytree', parameter_type=ParameterType.FLOAT, lower=0.01, upper=1.0, log_scale=False
    ),    
    RangeParameter(
        name='gamma', parameter_type=ParameterType.FLOAT, lower=1e-9, upper=0.5, log_scale=True
    ),
    RangeParameter(
        name='learning_rate', parameter_type=ParameterType.FLOAT, lower=0.01, upper=1.0, log_scale=True
    ),
    RangeParameter(
        name='max_delta_step', parameter_type=ParameterType.INT, lower=0, upper=20, log_scale=False
    ),
    RangeParameter(
        name='max_depth', parameter_type=ParameterType.INT, lower=0, upper=50, log_scale=False
    ),
    RangeParameter(
        name='min_child_weight', parameter_type=ParameterType.INT, lower=0, upper=10, log_scale=False
    ),
    RangeParameter(
        name='n_estimators', parameter_type=ParameterType.INT, lower=50, upper=150, log_scale=False
    ),
    RangeParameter(
        name='reg_alpha', parameter_type=ParameterType.FLOAT, lower=1e-9, upper=1.0, log_scale=True
    ),
    RangeParameter(
        name='reg_lambda', parameter_type=ParameterType.FLOAT, lower=1e-9, upper=1000.0, log_scale=True
    ),
    RangeParameter(
        name='scale_pos_weight', parameter_type=ParameterType.FLOAT, lower=1e-6, upper=500.0, log_scale=True
    ),
    RangeParameter(
        name='subsample', parameter_type=ParameterType.FLOAT, lower=0.4, upper=0.6, log_scale=True
    )    
])

In [6]:
# SimpleExperiment can be used here instead of Experiment because points tried in optimization
# are computed synchrously via the evaluation function.

exp = SimpleExperiment(
    name='XGB optimalization',
    search_space=xgb_search_space,
    evaluation_function=xgb_evaluation_function,
    objective_name='accuracy',
    minimize=False # maximize accuracy
)

In [7]:
# We only instantiate the Sobol generator once, as the underlying model does not to be re-fit every 
# time new data is added to the experiment.

sobol = modelbridge.get_sobol(search_space=exp.search_space)
print(f"Running Sobol initialization trials...")

for _ in range(5):
    exp.new_trial(generator_run=sobol.gen(1))

steps=20

# GP=Gaussian Process, EI=Expected Improvement
for i in range(steps):
    print(f"Running GP+EI optimization trial {i+1}/{steps}...")
    # Since we need to re-fit the underlying GP model, we reinstantiate the GP+EI model every
    # time new data is added to the experiment.
    gpei = modelbridge.get_GPEI(experiment=exp, data=exp.eval())
    generator_run = gpei.gen(1)
    # best_arm, _ = generator_run.best_arm_predictions
    exp.new_trial(generator_run=generator_run)

Running Sobol initialization trials...
Running GP+EI optimization trial 1/20...
accuracy: 0.5060769230769231
accuracy: 0.8226153846153846
accuracy: 0.5060769230769231
accuracy: 0.8805384615384615
accuracy: 0.5060769230769231
Running GP+EI optimization trial 2/20...
accuracy: 0.8703846153846154
Running GP+EI optimization trial 3/20...
accuracy: 0.8431538461538461
Running GP+EI optimization trial 4/20...
accuracy: 0.8837692307692308
Running GP+EI optimization trial 5/20...
accuracy: 0.836076923076923
Running GP+EI optimization trial 6/20...
accuracy: 0.8755384615384615
Running GP+EI optimization trial 7/20...
accuracy: 0.8806923076923077
Running GP+EI optimization trial 8/20...
accuracy: 0.8367692307692308
Running GP+EI optimization trial 9/20...
accuracy: 0.8416923076923077
Running GP+EI optimization trial 10/20...
accuracy: 0.872
Running GP+EI optimization trial 11/20...
accuracy: 0.8756153846153846
Running GP+EI optimization trial 12/20...
accuracy: 0.886
Running GP+EI optimization tr

In [8]:
from ax.service.utils.best_point import get_best_from_model_predictions

model_predictions = get_best_from_model_predictions(experiment=exp)
print ('model_predictions:', model_predictions)

model_predictions: ({'colsample_bylevel': 0.5474285959685485, 'colsample_bytree': 0.45620725714512717, 'gamma': 2.3406682788971863e-07, 'learning_rate': 0.0865569197680857, 'max_delta_step': 15, 'max_depth': 6, 'min_child_weight': 6, 'n_estimators': 120, 'reg_alpha': 1.0, 'reg_lambda': 1.2224163336215744e-06, 'scale_pos_weight': 0.9618280388126623, 'subsample': 0.493838999516407}, ({'accuracy': 0.8925384283698093}, {'accuracy': {'accuracy': 1.4957462777355936e-09}}))
