scikit-learn · jjerphan · May 24, 2022 · May 12, 2022 · May 12, 2022 · May 12, 2022
diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py
@@ -21,8 +21,12 @@
     - :class:`~sklearn.svm.NuSVR` (for regression data) which implements
       Nu support vector regression;
 
-    - :class:`~sklearn.ensemble.GradientBoostingRegressor` (for regression
-      data) which builds an additive model in a forward stage-wise fashion.
+    - :class:`~sklearn.ensemble.GradientBoostingRegressor` (for regression data)
+      which builds an additive model in a forward stage-wise fashion. Notice
+      that the :func:`~sklearn.ensemble.HistGradientBoostingRegressor` estimator
+      is much faster than :func:`~sklearn.ensemble.GradientBoostingRegressor for
+      big datasets (n_samples >= 10_000), which is not the case for this
+      example.
 
 
 We make the model complexity vary through the choice of relevant model

diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
@@ -39,7 +39,7 @@
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
 
-X, y = make_classification(n_samples=80000, random_state=10)
+X, y = make_classification(n_samples=80_000, random_state=10)
 
 X_full_train, X_test, y_full_train, y_test = train_test_split(
     X, y, test_size=0.5, random_state=10
@@ -72,6 +72,12 @@
 _ = gradient_boosting.fit(X_train_ensemble, y_train_ensemble)
 
 # %%
+# Notice that the :func:`~sklearn.ensemble.HistGradientBoostingClassifier`
+# estimator is much faster than
+# :func:`~sklearn.ensemble.GradientBoostingClassifier` for big datasets
+# (n_samples >= 10_000) such as this case. The use the latter is shown for
+# pedagogical reasons only.
+#
 # The :class:`~sklearn.ensemble.RandomTreesEmbedding` is an unsupervised method
 # and thus does not required to be trained independently.
 

diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -72,6 +72,11 @@ def f(x):
     all_models["q %1.2f" % alpha] = gbr.fit(X_train, y_train)
 
 # %%
+# Notice that the :func:`~sklearn.ensemble.HistGradientBoostingRegressor`
+# estimator is much faster than
+# :func:`~sklearn.ensemble.GradientBoostingRegressor` for big datasets
+# (n_samples >= 10_000), which is not the case of the present example.
+#
 # For the sake of comparison, we also fit a baseline model trained with the
 # usual (mean) squared error (MSE).
 gbr_ls = GradientBoostingRegressor(loss="squared_error", **common_params)

diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
@@ -991,12 +991,17 @@ def loss_(self):
 class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
     """Gradient Boosting for classification.
 
-    GB builds an additive model in a
-    forward stage-wise fashion; it allows for the optimization of
-    arbitrary differentiable loss functions. In each stage ``n_classes_``
-    regression trees are fit on the negative gradient of the loss function,
-    e.g. binary or multiclass log loss. Binary classification
-    is a special case where only a single regression tree is induced.
+    This estimator builds an additive model in a forward stage-wise fashion; it
+    allows for the optimization of arbitrary differentiable loss functions. In
+    each stage ``n_classes_`` regression trees are fit on the negative gradient
+    of the loss function, e.g. binary or multiclass log loss. Binary
+    classification is a special case where only a single regression tree is
+    induced.
+
+    The estimator
+    :class:`HistGradientBoostingClassifier<sklearn.ensemble.HistGradientBoostingClassifier>`
+    is a much faster variant of this estimator for big datasets (n_samples >=
+    10_000).
 
     Read more in the :ref:`User Guide <gradient_boosting>`.
 
@@ -1559,10 +1564,15 @@ def staged_predict_proba(self, X):
 class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     """Gradient Boosting for regression.
 
-    GB builds an additive model in a forward stage-wise fashion;
-    it allows for the optimization of arbitrary differentiable loss functions.
-    In each stage a regression tree is fit on the negative gradient of the
-    given loss function.
+    This estimator builds an additive model in a forward stage-wise fashion; it
+    allows for the optimization of arbitrary differentiable loss functions. In
+    each stage a regression tree is fit on the negative gradient of the given
+    loss function.
+
+    The estimator
+    :class:`HistGradientBoostingRegressor<sklearn.ensemble.HistGradientBoostingRegressor>`
+    is a much faster variant of this estimator for big datasets (n_samples >=
+    10_000).
 
     Read more in the :ref:`User Guide <gradient_boosting>`.