scikit-learn · amueller · Jul 29, 2019 · Jul 11, 2019 · Jul 11, 2019 · Jul 11, 2019
diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
@@ -115,6 +115,15 @@ Changelog
   preserve the class balance of the original training set. :pr:`14194`
   by :user:`Johann Faouzi <johannfaouzi>`.
 
+- |Fix| Run by default
+  :func:`utils.estimator_checks.check_estimator` on both
+  :class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`. It
+  leads to solve issues regarding shape consistency during `predict` which was
+  failing when the underlying estimators were not outputting consistent array
+  dimensions. Note that it should be replaced by refactoring the common tests
+  in the future.
+  :pr:`14305` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 - |Efficiency| :func:`ensemble.MissingIndicator.fit_transform` the
   _get_missing_features_info function is now called once when calling
   fit_transform for MissingIndicator class. :pr:`14356` by :user:

diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
@@ -6,13 +6,17 @@
 from sklearn.utils.testing import assert_almost_equal, assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raise_message
+from sklearn.utils.estimator_checks import check_estimator
+from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import LogisticRegression
 from sklearn.naive_bayes import GaussianNB
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.ensemble import VotingClassifier, VotingRegressor
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.tree import DecisionTreeRegressor
 from sklearn.model_selection import GridSearchCV
 from sklearn import datasets
 from sklearn.model_selection import cross_val_score, train_test_split
@@ -508,3 +512,20 @@ def test_none_estimator_with_weights(X, y, voter, drop):
     voter.fit(X, y, sample_weight=np.ones(y.shape))
     y_pred = voter.predict(X)
     assert y_pred.shape == y.shape
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [VotingRegressor(
+        estimators=[('lr', LinearRegression()),
+                    ('tree', DecisionTreeRegressor(random_state=0))]),
+     VotingClassifier(
+         estimators=[('lr', LogisticRegression(random_state=0)),
+                     ('tree', DecisionTreeClassifier(random_state=0))])],
+    ids=['VotingRegressor', 'VotingClassifier']
+)
+def test_check_estimators_voting_estimator(estimator):
+    # FIXME: to be removed when meta-estimators can be specified themselves
+    # their testing parameters (for required parameters).
+    check_estimator(estimator)
+    check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)
diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py
@@ -13,19 +13,22 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 from abc import abstractmethod
 
+import numpy as np
+
 from joblib import Parallel, delayed
 
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
 from ..base import TransformerMixin
 from ..base import clone
 from ..preprocessing import LabelEncoder
+from ..utils import Bunch
 from ..utils.validation import check_is_fitted
 from ..utils.metaestimators import _BaseComposition
-from ..utils import Bunch
+from ..utils.multiclass import check_classification_targets
+from ..utils.validation import column_or_1d
 
 
 def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
@@ -67,7 +70,7 @@ def _weights_not_none(self):
 
     def _predict(self, X):
         """Collect results from clf.predict calls. """
-        return np.asarray([clf.predict(X) for clf in self.estimators_]).T
+        return np.asarray([est.predict(X) for est in self.estimators_]).T
 
     @abstractmethod
     def fit(self, X, y, sample_weight=None):
@@ -264,6 +267,7 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
+        check_classification_targets(y)
         if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
             raise NotImplementedError('Multilabel and multi-output'
                                       ' classification is not supported.')
@@ -454,6 +458,7 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
+        y = column_or_1d(y, warn=True)
         return super().fit(X, y, sample_weight)
 
     def predict(self, X):

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -23,9 +23,9 @@
 from sklearn.base import RegressorMixin
 from sklearn.cluster.bicluster import BiclusterMixin
 
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.linear_model.base import LinearClassifierMixin
 from sklearn.linear_model import Ridge
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.utils import IS_PYPY
 from sklearn.utils.estimator_checks import (
     _yield_all_checks,

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -30,7 +30,6 @@
 from ..discriminant_analysis import LinearDiscriminantAnalysis
 from ..linear_model import Ridge
 
-
 from ..base import (clone, ClusterMixin, is_classifier, is_regressor,
                     _DEFAULT_TAGS, RegressorMixin, is_outlier_detector)