From be027e0b316097ad0cea9956aa6829ae969d8a62 Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Thu, 31 Oct 2019 19:48:24 +0100 Subject: [PATCH] DOC highlights: stacking estimators (#15414) * stacking estimators * cleanup a line * user guide * apply thomas's suggestions * apply suggestions * use --upgrade instead --- .../plot_release_highlights_0_22_0.py | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py index 75cb2b48705ff..5639089ec2098 100644 --- a/examples/release_highlights/plot_release_highlights_0_22_0.py +++ b/examples/release_highlights/plot_release_highlights_0_22_0.py @@ -12,7 +12,7 @@ To install the latest version (with pip):: - pip install -U scikit-learn --upgrade + pip install --upgrade scikit-learn or with conda:: @@ -172,3 +172,43 @@ # recomputed. estimator.set_params(isomap__n_neighbors=5) estimator.fit(X) + +############################################################################ +# Stacking Classifier and Regressor +# --------------------------------- +# :class:`~ensemble.StackingClassifier` and +# :class:`~ensemble.StackingRegressor` +# allow you to have a stack of estimators with a final classifier or +# a regressor. +# Stacked generalization consists in stacking the output of individual +# estimators and use a classifier to compute the final prediction. Stacking +# allows to use the strength of each individual estimator by using their output +# as input of a final estimator. +# Base estimators are fitted on the full ``X`` while +# the final estimator is trained using cross-validated predictions of the +# base estimators using ``cross_val_predict``. +# +# Read more in the :ref:`User Guide `. + +from sklearn.datasets import load_iris +from sklearn.ensemble import RandomForestClassifier +from sklearn.svm import LinearSVC +from sklearn.linear_model import LogisticRegression +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline +from sklearn.ensemble import StackingClassifier +from sklearn.model_selection import train_test_split + +X, y = load_iris(return_X_y=True) +estimators = [ + ('rf', RandomForestClassifier(n_estimators=10, random_state=42)), + ('svr', make_pipeline(StandardScaler(), + LinearSVC(random_state=42))) +] +clf = StackingClassifier( + estimators=estimators, final_estimator=LogisticRegression() +) +X_train, X_test, y_train, y_test = train_test_split( + X, y, stratify=y, random_state=42 +) +clf.fit(X_train, y_train).score(X_test, y_test)