scikit-learn · jnothman · Nov 5, 2019 · Jul 19, 2018 · Jul 19, 2018 · Jul 19, 2018
diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
@@ -69,7 +69,6 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
-
 :mod:`sklearn.base`
 ...................
 
@@ -311,6 +310,15 @@ Changelog
 :mod:`sklearn.feature_selection`
 ................................
 
+- |Enhancement| Updated the following :mod:`feature_selection` estimators to allow
+  NaN/Inf values in ``transform`` and ``fit``:
+  :class:`feature_selection.RFE`, :class:`feature_selection.RFECV`,
+  :class:`feature_selection.SelectFromModel`,
+  and :class:`feature_selection.VarianceThreshold`. Note that if the underlying
+  estimator of the feature selector does not allow NaN/Inf then it will still
+  error, but the feature selectors themselves no longer enforce this
+  restriction unnecessarily. :issue:`11635` by :user:`Alec Peters <adpeters>`.
+
 - |Fix| Fixed a bug where :class:`feature_selection.VarianceThreshold` with
   `threshold=0` did not remove constant features due to numerical instability,
   by using range rather than variance in this case.

diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py
@@ -71,7 +71,9 @@ def transform(self, X):
         X_r : array of shape [n_samples, n_selected_features]
             The input samples with only the selected features.
         """
-        X = check_array(X, dtype=None, accept_sparse='csr')
+        tags = self._get_tags()
+        X = check_array(X, dtype=None, accept_sparse='csr',
+                        force_all_finite=not tags.get('allow_nan', True))
         mask = self.get_support()
         if not mask.any():
             warn("No features were selected: either the data is"

diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
@@ -131,6 +131,10 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
     threshold_ : float
         The threshold value used for feature selection.
 
+    Notes
+    -----
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
     Examples
     --------
     >>> from sklearn.feature_selection import SelectFromModel
@@ -249,3 +253,7 @@ def partial_fit(self, X, y=None, **fit_params):
             self.estimator_ = clone(self.estimator)
         self.estimator_.partial_fit(X, y, **fit_params)
         return self
+
+    def _more_tags(self):
+        estimator_tags = self.estimator._get_tags()
+        return {'allow_nan': estimator_tags.get('allow_nan', True)}
diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -103,6 +103,10 @@ class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
     >>> selector.ranking_
     array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
 
+    Notes
+    -----
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
     See also
     --------
     RFECV : Recursive feature elimination with built-in cross-validated
@@ -150,7 +154,9 @@ def _fit(self, X, y, step_score=None):
         # and is used when implementing RFECV
         # self.scores_ will not be calculated when calling _fit through fit
 
-        X, y = check_X_y(X, y, "csc", ensure_min_features=2)
+        tags = self._get_tags()
+        X, y = check_X_y(X, y, "csc", ensure_min_features=2,
+                         force_all_finite=not tags.get('allow_nan', True))
         # Initialization
         n_features = X.shape[1]
         if self.n_features_to_select is None:
@@ -326,7 +332,9 @@ def predict_log_proba(self, X):
         return self.estimator_.predict_log_proba(self.transform(X))
 
     def _more_tags(self):
-        return {'poor_score': True}
+        estimator_tags = self.estimator._get_tags()
+        return {'poor_score': True,
+                'allow_nan': estimator_tags.get('allow_nan', True)}
 
 
 class RFECV(RFE):
@@ -421,6 +429,8 @@ class RFECV(RFE):
     ``ceil((n_features - min_features_to_select) / step) + 1``,
     where step is the number of features removed at each iteration.
 
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
     Examples
     --------
     The following example shows how to retrieve the a-priori not known 5
@@ -479,7 +489,8 @@ def fit(self, X, y, groups=None):
             train/test set. Only used in conjunction with a "Group" :term:`cv`
             instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
         """
-        X, y = check_X_y(X, y, "csr", ensure_min_features=2)
+        X, y = check_X_y(X, y, "csr", ensure_min_features=2,
+                         force_all_finite=False)
 
         # Initialization
         cv = check_cv(self.cv, y, is_classifier(self.estimator))

diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py
@@ -29,6 +29,10 @@ class VarianceThreshold(SelectorMixin, BaseEstimator):
     variances_ : array, shape (n_features,)
         Variances of individual features.
 
+    Notes
+    -----
+    Allows NaN in the input.
+
     Examples
     --------
     The following dataset has integer features, two of which are the same
@@ -61,24 +65,27 @@ def fit(self, X, y=None):
         -------
         self
         """
-        X = check_array(X, ('csr', 'csc'), dtype=np.float64)
+        X = check_array(X, ('csr', 'csc'), dtype=np.float64,
+                        force_all_finite='allow-nan')
 
         if hasattr(X, "toarray"):   # sparse matrix
             _, self.variances_ = mean_variance_axis(X, axis=0)
             if self.threshold == 0:
                 mins, maxes = min_max_axis(X, axis=0)
                 peak_to_peaks = maxes - mins
         else:
-            self.variances_ = np.var(X, axis=0)
+            self.variances_ = np.nanvar(X, axis=0)
             if self.threshold == 0:
                 peak_to_peaks = np.ptp(X, axis=0)
 
         if self.threshold == 0:
             # Use peak-to-peak to avoid numeric precision issues
             # for constant features
-            self.variances_ = np.minimum(self.variances_, peak_to_peaks)
+            compare_arr = np.array([self.variances_, peak_to_peaks])
+            self.variances_ = np.nanmin(compare_arr, axis=0)
 
-        if np.all(self.variances_ <= self.threshold):
+        if np.all(~np.isfinite(self.variances_) |
+                  (self.variances_ <= self.threshold)):
             msg = "No feature in X meets the variance threshold {0:.5f}"
             if X.shape[0] == 1:
                 msg += " (X contains only one sample)"
@@ -90,3 +97,6 @@ def _get_support_mask(self):
         check_is_fitted(self)
 
         return self.variances_ > self.threshold
+
+    def _more_tags(self):
+        return {'allow_nan': True}
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
@@ -10,10 +10,28 @@
 from sklearn.linear_model import LogisticRegression, SGDClassifier, Lasso
 from sklearn.svm import LinearSVC
 from sklearn.feature_selection import SelectFromModel
-from sklearn.ensemble import RandomForestClassifier
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import (RandomForestClassifier,
+                              HistGradientBoostingClassifier)
 from sklearn.linear_model import PassiveAggressiveClassifier
 from sklearn.base import BaseEstimator
 
+
+class NaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {'allow_nan': True}
+
+
+class NoNaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {'allow_nan': False}
+
+
+class NaNTagRandomForest(RandomForestClassifier):
+    def _more_tags(self):
+        return {'allow_nan': True}
+
+
 iris = datasets.load_iris()
 data, y = iris.data, iris.target
 rng = np.random.RandomState(0)
@@ -320,3 +338,40 @@ def test_threshold_without_refitting():
     # Set a higher threshold to filter out more features.
     model.threshold = "1.0 * mean"
     assert X_transform.shape[1] > model.transform(data).shape[1]
+
+
+def test_fit_accepts_nan_inf():
+    # Test that fit doesn't check for np.inf and np.nan values.
+    clf = HistGradientBoostingClassifier(random_state=0)
+
+    model = SelectFromModel(estimator=clf)
+
+    nan_data = data.copy()
+    nan_data[0] = np.NaN
+    nan_data[1] = np.Inf
+
+    model.fit(data, y)
+
+
+def test_transform_accepts_nan_inf():
+    # Test that transform doesn't check for np.inf and np.nan values.
+    clf = NaNTagRandomForest(n_estimators=100, random_state=0)
+    nan_data = data.copy()
+
+    model = SelectFromModel(estimator=clf)
+    model.fit(nan_data, y)
+
+    nan_data[0] = np.NaN
+    nan_data[1] = np.Inf
+
+    model.transform(nan_data)
+
+
+def test_allow_nan_tag_comes_from_estimator():
+    allow_nan_est = NaNTag()
+    model = SelectFromModel(estimator=allow_nan_est)
+    assert model._get_tags()['allow_nan'] is True
+
+    no_nan_est = NoNaNTag()
+    model = SelectFromModel(estimator=no_nan_est)
+    assert model._get_tags()['allow_nan'] is False
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
@@ -2,6 +2,7 @@
 Testing Recursive feature elimination
 """
 
+import pytest
 import numpy as np
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 from scipy import sparse
@@ -54,6 +55,9 @@ def get_params(self, deep=True):
     def set_params(self, **params):
         return self
 
+    def _get_tags(self):
+        return {}
+
 
 def test_rfe_features_importance():
     generator = check_random_state(0)
@@ -369,3 +373,25 @@ def test_rfe_cv_groups():
     )
     est_groups.fit(X, y, groups=groups)
     assert est_groups.n_features_ > 0
+
+
+@pytest.mark.parametrize("cv", [
+    None,
+    5
+])
+def test_rfe_allow_nan_inf_in_x(cv):
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+
+    # add nan and inf value to X
+    X[0][0] = np.NaN
+    X[0][1] = np.Inf
+
+    clf = MockClassifier()
+    if cv is not None:
+        rfe = RFECV(estimator=clf, cv=cv)
+    else:
+        rfe = RFE(estimator=clf)
+    rfe.fit(X, y)
+    rfe.transform(X)
diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -46,3 +46,15 @@ def test_zero_variance_floating_point_error():
         msg = "No feature in X meets the variance threshold 0.00000"
         with pytest.raises(ValueError, match=msg):
             VarianceThreshold().fit(X)
+
+
+def test_variance_nan():
+    arr = np.array(data, dtype=np.float64)
+    # add single NaN and feature should still be included
+    arr[0, 0] = np.NaN
+    # make all values in feature NaN and feature should be rejected
+    arr[:, 1] = np.NaN
+
+    for X in [arr, csr_matrix(arr), csc_matrix(arr), bsr_matrix(arr)]:
+        sel = VarianceThreshold().fit(X)
+        assert_array_equal([0, 3, 4], sel.get_support(indices=True))