scikit-learn · thomasjpfan · Oct 5, 2019 · Sep 24, 2019 · Sep 24, 2019 · Sep 24, 2019
diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
@@ -236,6 +236,11 @@ Changelog
   :user:`Matt Hancock <notmatthancock>` and
   :pr:`5963` by :user:`Pablo Duboue <DrDub>`.
 
+- |Fix| Enforce underlying estimator to be the same type than voting estimator.
+  We introduced a new base class to raise consistent error message in
+  Stacking and Voting estimator.
+  By :pr:`15084` :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.feature_extraction`
 .................................
 

diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
@@ -15,6 +15,7 @@
 from ..base import MetaEstimatorMixin
 
 from .base import _parallel_fit_estimator
+from .base import _BaseHeterogeneousEnsemble
 
 from ..linear_model import LogisticRegression
 from ..linear_model import RidgeCV
@@ -32,80 +33,26 @@
 from ..utils.validation import column_or_1d
 
 
-class _BaseStacking(TransformerMixin, MetaEstimatorMixin, _BaseComposition,
+class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble,
                     metaclass=ABCMeta):
     """Base class for stacking method."""
-    _required_parameters = ['estimators']
 
     @abstractmethod
     def __init__(self, estimators, final_estimator=None, cv=None,
                  stack_method='auto', n_jobs=None, verbose=0):
-        self.estimators = estimators
+        super().__init__(estimators=estimators)
         self.final_estimator = final_estimator
         self.cv = cv
         self.stack_method = stack_method
         self.n_jobs = n_jobs
         self.verbose = verbose
 
-    @abstractmethod
-    def _validate_estimators(self):
-        if self.estimators is None or len(self.estimators) == 0:
-            raise ValueError(
-                "Invalid 'estimators' attribute, 'estimators' should be a list"
-                " of (string, estimator) tuples."
-            )
-        names, estimators = zip(*self.estimators)
-        self._validate_names(names)
-        return names, estimators
-
     def _clone_final_estimator(self, default):
         if self.final_estimator is not None:
             self.final_estimator_ = clone(self.final_estimator)
         else:
             self.final_estimator_ = clone(default)
 
-    def set_params(self, **params):
-        """Set the parameters for the stacking estimator.
-
-        Valid parameter keys can be listed with `get_params()`.
-
-        Parameters
-        ----------
-        params : keyword arguments
-            Specific parameters using e.g.
-            `set_params(parameter_name=new_value)`. In addition, to setting the
-            parameters of the stacking estimator, the individual estimator of
-            the stacking estimators can also be set, or can be removed by
-            setting them to 'drop'.
-
-        Examples
-        --------
-        In this example, the RandomForestClassifier is removed.
-
-        >>> from sklearn.linear_model import LogisticRegression
-        >>> from sklearn.ensemble import RandomForestClassifier
-        >>> from sklearn.ensemble import VotingClassifier
-        >>> clf1 = LogisticRegression()
-        >>> clf2 = RandomForestClassifier()
-        >>> eclf = StackingClassifier(estimators=[('lr', clf1), ('rf', clf2)])
-        >>> eclf.set_params(rf='drop')
-        StackingClassifier(estimators=[('lr', LogisticRegression()),
-                                        ('rf', 'drop')])
-        """
-        super()._set_params('estimators', **params)
-        return self
-
-    def get_params(self, deep=True):
-        """Get the parameters of the stacking estimator.
-
-        Parameters
-        ----------
-        deep : bool
-            Setting it to True gets the various classifiers and the parameters
-            of the classifiers as well.
-        """
-        return super()._get_params('estimators', deep=deep)
-
     def _concatenate_predictions(self, predictions):
         """Concatenate the predictions of each first layer learner.
 
@@ -172,13 +119,6 @@ def fit(self, X, y, sample_weight=None):
         names, all_estimators = self._validate_estimators()
         self._validate_final_estimator()
 
-        has_estimator = any(est != 'drop' for est in all_estimators)
-        if not has_estimator:
-            raise ValueError(
-                "All estimators are dropped. At least one is required "
-                "to be an estimator."
-            )
-
         stack_method = [self.stack_method] * len(all_estimators)
 
         # Fit the base estimators on the whole training data. Those
@@ -416,16 +356,6 @@ def __init__(self, estimators, final_estimator=None, cv=None,
             verbose=verbose
         )
 
-    def _validate_estimators(self):
-        names, estimators = super()._validate_estimators()
-        for est in estimators:
-            if est != 'drop' and not is_classifier(est):
-                raise ValueError(
-                    "The estimator {} should be a classifier."
-                    .format(est.__class__.__name__)
-                )
-        return names, estimators
-
     def _validate_final_estimator(self):
         self._clone_final_estimator(default=LogisticRegression())
         if not is_classifier(self.final_estimator_):
@@ -651,16 +581,6 @@ def __init__(self, estimators, final_estimator=None, cv=None, n_jobs=None,
             verbose=verbose
         )
 
-    def _validate_estimators(self):
-        names, estimators = super()._validate_estimators()
-        for est in estimators:
-            if est != 'drop' and not is_regressor(est):
-                raise ValueError(
-                    "The estimator {} should be a regressor."
-                    .format(est.__class__.__name__)
-                )
-        return names, estimators
-
     def _validate_final_estimator(self):
         self._clone_final_estimator(default=RidgeCV())
         if not is_regressor(self.final_estimator_):

diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
@@ -5,16 +5,20 @@
 # Authors: Gilles Louppe
 # License: BSD 3 clause
 
-import numpy as np
+from abc import ABCMeta, abstractmethod
 import numbers
 
+import numpy as np
+
 from joblib import effective_n_jobs
 
 from ..base import clone
+from ..base import is_classifier, is_regressor
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
+from ..utils import Bunch
 from ..utils import check_random_state
-from abc import ABCMeta, abstractmethod
+from ..utils.metaestimators import _BaseComposition
 
 MAX_RAND_SEED = np.iinfo(np.int32).max
 
@@ -178,3 +182,76 @@ def _partition_estimators(n_estimators, n_jobs):
     starts = np.cumsum(n_estimators_per_job)
 
     return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
+
+
+class _BaseHeterogeneousEnsemble(MetaEstimatorMixin, _BaseComposition,
+                                 metaclass=ABCMeta):
+    """Base class for ensemble learners based on heterogeneous estimators."""
+    _required_parameters = ['estimators']
+
+    @property
+    def named_estimators(self):
+        return Bunch(**dict(self.estimators))
+
+    @abstractmethod
+    def __init__(self, estimators):
+        self.estimators = estimators
+
+    def _validate_estimators(self):
+        if self.estimators is None or len(self.estimators) == 0:
+            raise AttributeError(
+                "Invalid 'estimators' attribute, 'estimators' should be a list"
+                " of (string, estimator) tuples."
+            )
+        names, estimators = zip(*self.estimators)
+        # defined by MetaEstimatorMixin
+        self._validate_names(names)
+
+        has_estimator = any(est not in (None, 'drop') for est in estimators)
+        if not has_estimator:
+            raise ValueError(
+                "All estimators are dropped. At least one is required "
+                "to be an estimator."
+            )
+
+        is_estimator_type = (is_classifier if is_classifier(self)
+                             else is_regressor)
+
+        for est in estimators:
+            if est not in (None, 'drop') and not is_estimator_type(est):
+                raise ValueError(
+                    "The estimator {} should be a {}."
+                    .format(
+                        est.__class__.__name__, is_estimator_type.__name__[3:]
+                    )
+                )
+
+        return names, estimators
+
+    def set_params(self, **params):
+        """Set the parameters of an estimator from the ensemble.
+
+        Valid parameter keys can be listed with `get_params()`.
+
+        Parameters
+        ----------
+        params : keyword arguments
+            Specific parameters using e.g.
+            `set_params(parameter_name=new_value)`. In addition, to setting the
+            parameters of the stacking estimator, the individual estimator of
+            the stacking estimators can also be set, or can be removed by
+            setting them to 'drop'.
+        """
+        super()._set_params('estimators', **params)
+        return self
+
+    def get_params(self, deep=True):
+        """Get the parameters of an estimator from the ensemble.
+
+        Parameters
+        ----------
+        deep : bool
+            Setting it to True gets the various classifiers and the parameters
+            of the classifiers as well.
+        """
+        return super()._get_params('estimators', deep=deep)
diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py
@@ -223,10 +223,10 @@ def fit(self, X, y):
     "y, params, type_err, msg_err",
     [(y_iris,
       {'estimators': None},
-      ValueError, "Invalid 'estimators' attribute,"),
+      AttributeError, "Invalid 'estimators' attribute,"),
      (y_iris,
       {'estimators': []},
-      ValueError, "Invalid 'estimators' attribute,"),
+      AttributeError, "Invalid 'estimators' attribute,"),
      (y_iris,
       {'estimators': [('lr', LinearRegression()),
                       ('svm', LinearSVC(max_iter=5e4))]},
@@ -265,10 +265,10 @@ def test_stacking_classifier_error(y, params, type_err, msg_err):
     "y, params, type_err, msg_err",
     [(y_diabetes,
       {'estimators': None},
-      ValueError, "Invalid 'estimators' attribute,"),
+      AttributeError, "Invalid 'estimators' attribute,"),
      (y_diabetes,
       {'estimators': []},
-      ValueError, "Invalid 'estimators' attribute,"),
+      AttributeError, "Invalid 'estimators' attribute,"),
      (y_diabetes,
       {'estimators': [('lr', LogisticRegression()), ('svm', LinearSVR())]},
       ValueError, 'should be a regressor'),

diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
@@ -37,8 +37,8 @@
 
 def test_estimator_init():
     eclf = VotingClassifier(estimators=[])
-    msg = ('Invalid `estimators` attribute, `estimators` should be'
-           ' a list of (string, estimator) tuples')
+    msg = ("Invalid 'estimators' attribute, 'estimators' should be"
+           " a list of (string, estimator) tuples.")
     assert_raise_message(AttributeError, msg, eclf.fit, X, y)
 
     clf = LogisticRegression(random_state=1)
@@ -417,7 +417,7 @@ def test_set_estimator_none(drop):
     eclf2.set_params(voting='soft').fit(X, y)
     assert_array_equal(eclf1.predict(X), eclf2.predict(X))
     assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
-    msg = 'All estimators are None or "drop". At least one is required!'
+    msg = 'All estimators are dropped. At least one is required'
     assert_raise_message(
         ValueError, msg, eclf2.set_params(lr=drop, rf=drop, nb=drop).fit, X, y)