# Tune Learning Rate

There are 6 variations of learning rate to be tested and each variation will be evaluated using 10-fold cross validation, meaning that there is a total of 6×10 or 60 XGBoost models to be trained and evaluated.
Note that we have fixed the number of trees.

In [3]:
# Tune learning_rate
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot
import numpy as np

# load data
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")
# split data into X and y
X = dataset[:,0:8]
Y = dataset[:,8]
# grid search
model = XGBClassifier()
learning_rate = [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3]
param_grid = dict(learning_rate=learning_rate)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold)
grid_result = grid_search.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.483013 using {'learning_rate': 0.1}
-0.689650 (0.000242) with: {'learning_rate': 0.0001}
-0.661274 (0.001954) with: {'learning_rate': 0.001}
-0.530747 (0.022961) with: {'learning_rate': 0.01}
-0.483013 (0.060755) with: {'learning_rate': 0.1}
-0.515440 (0.068974) with: {'learning_rate': 0.2}
-0.557315 (0.081738) with: {'learning_rate': 0.3}


In [4]:
# plot
pyplot.errorbar(learning_rate, means, yerr=stds)
pyplot.title("XGBoost learning_rate vs Log Loss")
pyplot.xlabel('learning_rate')
pyplot.ylabel('Log Loss')
pyplot.savefig('learning_rate.png')

# Tuning Learning Rate and the Number of Trees in XGBoost

There are 5 variations of n_estimators and 4 variations of learning_rate. Each combination will be evaluated using 10-fold cross validation, so that is a total of 4x5x10 or 200 XGBoost models that must be trained and evaluated.

In [5]:
n_estimators = [100, 200, 300, 400, 500]
learning_rate = [0.0001, 0.001, 0.01]
param_grid = dict(learning_rate=learning_rate, n_estimators=n_estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold)
grid_result = grid_search.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))
# plot results
scores = np.array(means).reshape(len(learning_rate), len(n_estimators))
for i, value in enumerate(learning_rate):
    pyplot.plot(n_estimators, scores[i], label='learning_rate: ' + str(value))
pyplot.legend()
pyplot.xlabel('n_estimators')
pyplot.ylabel('Log Loss')
pyplot.savefig('n_estimators_vs_learning_rate.png')

Best: -0.476375 using {'n_estimators': 500, 'learning_rate': 0.01}
-0.689650 (0.000242) with: {'n_estimators': 100, 'learning_rate': 0.0001}
-0.686232 (0.000472) with: {'n_estimators': 200, 'learning_rate': 0.0001}
-0.682878 (0.000678) with: {'n_estimators': 300, 'learning_rate': 0.0001}
-0.679574 (0.000875) with: {'n_estimators': 400, 'learning_rate': 0.0001}
-0.676337 (0.001069) with: {'n_estimators': 500, 'learning_rate': 0.0001}
-0.661274 (0.001954) with: {'n_estimators': 100, 'learning_rate': 0.001}
-0.634948 (0.003900) with: {'n_estimators': 200, 'learning_rate': 0.001}
-0.613073 (0.006087) with: {'n_estimators': 300, 'learning_rate': 0.001}
-0.594517 (0.008463) with: {'n_estimators': 400, 'learning_rate': 0.001}
-0.578893 (0.010938) with: {'n_estimators': 500, 'learning_rate': 0.001}
-0.530747 (0.022961) with: {'n_estimators': 100, 'learning_rate': 0.01}
-0.492309 (0.036248) with: {'n_estimators': 200, 'learning_rate': 0.01}
-0.481079 (0.042922) with: {'n_estimators': 300, 'lear

In [None]:
References:
----------------------------

https://machinelearningmastery.com/xgboost-python-mini-course/
