In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

np.set_printoptions(precision=3)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

# Gradient Boosting Classifier

In [None]:
from sklearn.datasets import load_digits
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, stratify=digits.target, random_state=0)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gbrt = GradientBoostingClassifier().fit(X_train, y_train)

In [None]:
gbrt.score(X_test, y_test)

In [None]:
np.set_printoptions(precision=6, suppress=True)
param_grid = {'learning_rate': np.logspace(-2, 0, 3),
              'n_estimators': [10, 100, 1000]}
param_grid

In [None]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(gbrt, param_grid=param_grid, cv=5, return_train_score=True)
grid.fit(X_train, y_train)

In [None]:
import pandas as pd
results = pd.DataFrame(grid.cv_results_)
results

In [None]:
res = pd.pivot_table(pd.DataFrame(grid.cv_results_), values='mean_test_score', index='param_learning_rate', columns='param_n_estimators')
pd.set_option("display.precision",4)
res = res.set_index(res.index.values.round(4))

In [None]:
res

In [None]:
import seaborn as sns
sns.heatmap(res, annot=True, fmt=".3g", vmin=0.6)

In [None]:
grid.best_params_

In [None]:
print("Best estimator:\n{}".format(grid.best_estimator_))

In [None]:
accuracy = grid.score(X_test, y_test)
print('Accuracy score of the {} is {:.3f}'.format(grid.__class__.__name__, accuracy))

# XGBoost

conda install -c conda-forge xgboost

In [None]:
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
print("accuracy on training set: %f" % xgb.score(X_train, y_train))
print("accuracy on test set: %f" % xgb.score(X_test, y_test))

# LightGBM

conda install -c conda-forge lightgbm

In [None]:
from lightgbm import LGBMClassifier
lgbm = LGBMClassifier()
lgbm.fit(X_train, y_train)
print("accuracy on training set: %f" % lgbm.score(X_train, y_train))
print("accuracy on test set: %f" % lgbm.score(X_test, y_test))

# CatBoost

conda install -c conda-forge catboost

In [None]:
from catboost import CatBoostClassifier
catb = CatBoostClassifier()
catb.fit(X_train, y_train)
print("accuracy on training set: %f" % catb.score(X_train, y_train))
print("accuracy on test set: %f" % catb.score(X_test, y_test))

# Gradient Boosting Regressor

In [None]:
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(
    boston.data, boston.target, random_state=42)

gbrt = GradientBoostingRegressor().fit(X_train, y_train)
print("Test-set score: {:.3f}".format(gbrt.score(X_test, y_test)))

In [None]:
gbrt.feature_importances_

In [None]:
boston['feature_names']

In [None]:
boston['DESCR']

In [None]:
fig, axes = plt.subplots(3, 5, figsize=(20, 10))
for i, ax in enumerate(axes.ravel()):
    if i > 12:
        ax.set_visible(False)
        continue
    ax.plot(boston.data[:, i], boston.target, 'o', alpha=.5)
    ax.set_title("{}: {}".format(i, boston.feature_names[i]))
    ax.set_ylabel("MEDV")