Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] FIX run test for meta-estimator having estimators keyword #14305

Merged
merged 14 commits into from Jul 29, 2019
9 changes: 9 additions & 0 deletions doc/whats_new/v0.22.rst
Expand Up @@ -115,6 +115,15 @@ Changelog
preserve the class balance of the original training set. :pr:`14194`
by :user:`Johann Faouzi <johannfaouzi>`.

- |Fix| Run by default
:func:`utils.estimator_checks.check_estimator` on both
:class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`. It
leads to solve issues regarding shape consistency during `predict` which was
failing when the underlying estimators were not outputting consistent array
dimensions. Note that it should be replaced by refactoring the common tests
in the future.
:pr:`14305` by :user:`Guillaume Lemaitre <glemaitre>`.

- |Efficiency| :func:`ensemble.MissingIndicator.fit_transform` the
_get_missing_features_info function is now called once when calling
fit_transform for MissingIndicator class. :pr:`14356` by :user:
Expand Down
21 changes: 21 additions & 0 deletions sklearn/ensemble/tests/test_voting.py
Expand Up @@ -6,13 +6,17 @@
from sklearn.utils.testing import assert_almost_equal, assert_array_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.estimator_checks import check_estimator
from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import datasets
from sklearn.model_selection import cross_val_score, train_test_split
Expand Down Expand Up @@ -508,3 +512,20 @@ def test_none_estimator_with_weights(X, y, voter, drop):
voter.fit(X, y, sample_weight=np.ones(y.shape))
y_pred = voter.predict(X)
assert y_pred.shape == y.shape


@pytest.mark.parametrize(
"estimator",
[VotingRegressor(
estimators=[('lr', LinearRegression()),
('tree', DecisionTreeRegressor(random_state=0))]),
VotingClassifier(
estimators=[('lr', LogisticRegression(random_state=0)),
('tree', DecisionTreeClassifier(random_state=0))])],
ids=['VotingRegressor', 'VotingClassifier']
)
def test_check_estimators_voting_estimator(estimator):
# FIXME: to be removed when meta-estimators can be specified themselves
# their testing parameters (for required parameters).
check_estimator(estimator)
check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)
11 changes: 8 additions & 3 deletions sklearn/ensemble/voting.py
Expand Up @@ -13,19 +13,22 @@
#
# License: BSD 3 clause

import numpy as np
from abc import abstractmethod

import numpy as np

from joblib import Parallel, delayed

from ..base import ClassifierMixin
from ..base import RegressorMixin
from ..base import TransformerMixin
from ..base import clone
from ..preprocessing import LabelEncoder
from ..utils import Bunch
from ..utils.validation import check_is_fitted
from ..utils.metaestimators import _BaseComposition
from ..utils import Bunch
from ..utils.multiclass import check_classification_targets
from ..utils.validation import column_or_1d


def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
Expand Down Expand Up @@ -67,7 +70,7 @@ def _weights_not_none(self):

def _predict(self, X):
"""Collect results from clf.predict calls. """
return np.asarray([clf.predict(X) for clf in self.estimators_]).T
return np.asarray([est.predict(X) for est in self.estimators_]).T

@abstractmethod
def fit(self, X, y, sample_weight=None):
Expand Down Expand Up @@ -264,6 +267,7 @@ def fit(self, X, y, sample_weight=None):
-------
self : object
"""
check_classification_targets(y)
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
raise NotImplementedError('Multilabel and multi-output'
' classification is not supported.')
Expand Down Expand Up @@ -454,6 +458,7 @@ def fit(self, X, y, sample_weight=None):
-------
self : object
"""
y = column_or_1d(y, warn=True)
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
return super().fit(X, y, sample_weight)

def predict(self, X):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/tests/test_common.py
Expand Up @@ -23,9 +23,9 @@
from sklearn.base import RegressorMixin
from sklearn.cluster.bicluster import BiclusterMixin

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model.base import LinearClassifierMixin
from sklearn.linear_model import Ridge
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.utils import IS_PYPY
from sklearn.utils.estimator_checks import (
_yield_all_checks,
Expand Down
1 change: 0 additions & 1 deletion sklearn/utils/estimator_checks.py
Expand Up @@ -30,7 +30,6 @@
from ..discriminant_analysis import LinearDiscriminantAnalysis
from ..linear_model import Ridge


from ..base import (clone, ClusterMixin, is_classifier, is_regressor,
_DEFAULT_TAGS, RegressorMixin, is_outlier_detector)

Expand Down