Skip to content

Commit

Permalink
FIX Improves feature names support for SelectFromModel + Est w/o names (
Browse files Browse the repository at this point in the history
#21991)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
  • Loading branch information
thomasjpfan and glemaitre committed Dec 24, 2021
1 parent 5de723d commit 6db0e2c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 0 deletions.
7 changes: 7 additions & 0 deletions doc/whats_new/v1.0.rst
Expand Up @@ -35,6 +35,13 @@ Changelog
and :class:`decomposition.MiniBatchSparsePCA` to be convex and match the referenced
article. :pr:`19210` by :user:`Jérémie du Boisberranger <jeremiedbb>`.

:mod:`sklearn.feature_selection`
................................

- |Fix| Fixed :class:`feature_selection.SelectFromModel` by improving support
for base estimators that do not set `feature_names_in_`. :pr:`21991` by
`Thomas Fan`_.

:mod:`sklearn.manifold`
.......................

Expand Down
4 changes: 4 additions & 0 deletions sklearn/feature_selection/_from_model.py
Expand Up @@ -265,8 +265,12 @@ def fit(self, X, y=None, **fit_params):
raise NotFittedError("Since 'prefit=True', call transform directly")
self.estimator_ = clone(self.estimator)
self.estimator_.fit(X, y, **fit_params)

if hasattr(self.estimator_, "feature_names_in_"):
self.feature_names_in_ = self.estimator_.feature_names_in_
else:
self._check_feature_names(X, reset=True)

return self

@property
Expand Down
28 changes: 28 additions & 0 deletions sklearn/feature_selection/tests/test_from_model.py
Expand Up @@ -5,6 +5,7 @@
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import skip_if_32bit
from sklearn.utils._testing import MinimalClassifier

from sklearn import datasets
from sklearn.linear_model import LogisticRegression, SGDClassifier, Lasso
Expand Down Expand Up @@ -428,3 +429,30 @@ def test_importance_getter(estimator, importance_getter):
)
selector.fit(data, y)
assert selector.transform(data).shape[1] == 1


def test_estimator_does_not_support_feature_names():
"""SelectFromModel works with estimators that do not support feature_names_in_.
Non-regression test for #21949.
"""
pytest.importorskip("pandas")
X, y = datasets.load_iris(as_frame=True, return_X_y=True)
all_feature_names = set(X.columns)

def importance_getter(estimator):
return np.arange(X.shape[1])

selector = SelectFromModel(
MinimalClassifier(), importance_getter=importance_getter
).fit(X, y)

# selector learns the feature names itself
assert_array_equal(selector.feature_names_in_, X.columns)

feature_names_out = set(selector.get_feature_names_out())
assert feature_names_out < all_feature_names

with pytest.warns(None) as records:
selector.transform(X.iloc[1:3])
assert not [str(record.message) for record in records]

0 comments on commit 6db0e2c

Please sign in to comment.