# 在 Python 中使用 XGBoost 调整梯度提升的学习率

## 在 XGBoost 中调整学习率

In [None]:
from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data[:, 1:3], iris.target

In [6]:
# XGBoost on Otto dataset, Tune learning_rate
from pandas import read_csv
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot
# # load data
# data = read_csv('train.csv')
# dataset = data.values
# # split data into X and y
# X = dataset[:,0:94]
# y = dataset[:,94]
# # encode string class values as integers
# label_encoded_y = LabelEncoder().fit_transform(y)
# grid search
model = XGBClassifier()
learning_rate = [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3]
param_grid = dict(learning_rate=learning_rate)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=1, cv=kfold)
# grid_result = grid_search.fit(X, label_encoded_y)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))
# plot
pyplot.errorbar(learning_rate, means, yerr=stds)
pyplot.title("XGBoost learning_rate vs Log Loss")
pyplot.xlabel('learning_rate')
pyplot.ylabel('Log Loss')
pyplot.savefig('learning_rate.png')

Best: -0.231555 using {'learning_rate': 0.1}
-1.086171 (0.000869) with: {'learning_rate': 0.0001}
-0.983302 (0.008330) with: {'learning_rate': 0.001}
-0.448460 (0.062565) with: {'learning_rate': 0.01}
-0.231555 (0.240100) with: {'learning_rate': 0.1}
-0.258177 (0.274780) with: {'learning_rate': 0.2}
-0.285556 (0.298395) with: {'learning_rate': 0.3}




## 调整 XGBoost 中的学习率和树的数量

In [7]:
# XGBoost on Otto dataset, Tune learning_rate and n_estimators
from pandas import read_csv
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot
import numpy
# # load data
# data = read_csv('train.csv')
# dataset = data.values
# # split data into X and y
# X = dataset[:,0:94]
# y = dataset[:,94]
# # encode string class values as integers
# label_encoded_y = LabelEncoder().fit_transform(y)
# # grid search
model = XGBClassifier()
n_estimators = [10, 20, 30, 40, 50]
learning_rate = [0.0001, 0.001, 0.01, 0.1]
param_grid = dict(learning_rate=learning_rate, n_estimators=n_estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=1, cv=kfold)
# grid_result = grid_search.fit(X, label_encoded_y)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))
# plot results
scores = numpy.array(means).reshape(len(learning_rate), len(n_estimators))
for i, value in enumerate(learning_rate):
    pyplot.plot(n_estimators, scores[i], label='learning_rate: ' + str(value))
pyplot.legend()
pyplot.xlabel('n_estimators')
pyplot.ylabel('Log Loss')
pyplot.savefig('n_estimators_vs_learning_rate.png')

Best: -0.207794 using {'learning_rate': 0.1, 'n_estimators': 40}
-1.097358 (0.000087) with: {'learning_rate': 0.0001, 'n_estimators': 10}
-1.096107 (0.000175) with: {'learning_rate': 0.0001, 'n_estimators': 20}
-1.094857 (0.000262) with: {'learning_rate': 0.0001, 'n_estimators': 30}
-1.093610 (0.000349) with: {'learning_rate': 0.0001, 'n_estimators': 40}
-1.092364 (0.000436) with: {'learning_rate': 0.0001, 'n_estimators': 50}
-1.086166 (0.000870) with: {'learning_rate': 0.001, 'n_estimators': 10}
-1.073935 (0.001730) with: {'learning_rate': 0.001, 'n_estimators': 20}
-1.061915 (0.002582) with: {'learning_rate': 0.001, 'n_estimators': 30}
-1.050100 (0.003425) with: {'learning_rate': 0.001, 'n_estimators': 40}
-1.038488 (0.004260) with: {'learning_rate': 0.001, 'n_estimators': 50}
-0.982845 (0.008361) with: {'learning_rate': 0.01, 'n_estimators': 10}
-0.884552 (0.015968) with: {'learning_rate': 0.01, 'n_estimators': 20}
-0.800174 (0.022982) with: {'learning_rate': 0.01, 'n_estimators': 3