scikit-learn · glemaitre · Jul 30, 2021 · Apr 11, 2020 · Apr 18, 2020 · May 10, 2020
diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -16,6 +16,7 @@
 from ..utils._tags import _safe_tags
 from ..utils.validation import check_is_fitted
 from ..utils.fixes import delayed
+from ..utils.deprecation import deprecated
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
 from ..base import clone
@@ -472,6 +473,25 @@ class RFECV(RFE):
         ``grid_scores_[i]`` corresponds to
         the CV score of the i-th subset of features.
 
+        .. deprecated:: 0.24
+            The `grid_scores_` attribute is deprecated in version 0.24 in favor
+            of `cv_results_` and will be removed in version
+            1.1 (renaming of 0.26).
+
+    cv_results_ : dict of ndarrays
+        A dict with keys:
+
+        split(k)_score : ndarray of shape (n_features,)
+            The cross-validation scores across (k)th fold.
+
+        mean_score : ndarray of shape (n_features,)
+            Mean of scores over the folds.
+
+        std_score : ndarray of shape (n_features,)
+            Standard deviation of scores over the folds.
+
+        .. versionadded:: 0.24
+
     n_features_ : int
         The number of selected features with cross-validation.
 
@@ -603,9 +623,10 @@ def fit(self, X, y, groups=None):
             func(rfe, self.estimator, X, y, train, test, scorer)
             for train, test in cv.split(X, y, groups))
 
-        scores = np.sum(scores, axis=0)
-        scores_rev = scores[::-1]
-        argmax_idx = len(scores) - np.argmax(scores_rev) - 1
+        scores = np.array(scores)
+        scores_sum = np.sum(scores, axis=0)
+        scores_sum_rev = scores_sum[::-1]
+        argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1
         n_features_to_select = max(
             n_features - (argmax_idx * step),
             self.min_features_to_select)
@@ -625,7 +646,27 @@ def fit(self, X, y, groups=None):
         self.estimator_ = clone(self.estimator)
         self.estimator_.fit(self.transform(X), y)
 
-        # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
-        # here, the scores are normalized by get_n_splits(X, y)
-        self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)
+        # reverse to stay consistent with before
+        scores_rev = scores[:, ::-1]
+        self.cv_results_ = {}
+        self.cv_results_["mean_score"] = np.mean(scores_rev, axis=0)
+        self.cv_results_["std_score"] = np.std(scores_rev, axis=0)
+
+        for i in range(scores.shape[0]):
+            self.cv_results_[f"split{i}_score"] = scores_rev[i]
+
         return self
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "The grid_scores_ attribute is deprecated in version 0.24 in favor "
+        "of cv_results_ and will be removed in version 1.1 (renaming of 0.26)."
+    )
+    @property
+    def grid_scores_(self):
+        # remove 2 for mean_score, std_score
+        grid_size = len(self.cv_results_) - 2
+        return np.asarray(
+            [self.cv_results_["split{}_score".format(i)]
+             for i in range(grid_size)]).T
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
@@ -5,7 +5,8 @@
 from operator import attrgetter
 import pytest
 import numpy as np
-from numpy.testing import assert_array_almost_equal, assert_array_equal
+from numpy.testing import (assert_array_almost_equal,
+                           assert_array_equal, assert_allclose)
 from scipy import sparse
 
 from sklearn.feature_selection import RFE, RFECV
@@ -165,7 +166,17 @@ def test_rfecv():
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=1)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
-    assert len(rfecv.grid_scores_) == X.shape[1]
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
+    msg = (r"The grid_scores_ attribute is deprecated in version 0\.24 in "
+           r"favor of cv_results_ and will be removed in version 1\.1 "
+           r"\(renaming of 0\.26\).")
+    with pytest.warns(FutureWarning, match=msg):
+        assert len(rfecv.grid_scores_) == X.shape[1]
+
+    for key in rfecv.cv_results_.keys():
+        assert len(rfecv.cv_results_[key]) == X.shape[1]
+
     assert len(rfecv.ranking_) == X.shape[1]
     X_r = rfecv.transform(X)
 
@@ -193,12 +204,17 @@ def test_rfecv():
     X_r = rfecv.transform(X)
     assert_array_equal(X_r, iris.data)
 
-    # Test fix on grid_scores
+    # Test fix on cv_results_
     def test_scorer(estimator, X, y):
         return 1.0
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=test_scorer)
     rfecv.fit(X, y)
-    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
+    with pytest.warns(FutureWarning, match=msg):
+        assert_array_equal(rfecv.grid_scores_,
+                           np.ones(rfecv.grid_scores_.shape))
+
     # In the event of cross validation score ties, the expected behavior of
     # RFECV is to return the FEWEST features that maximize the CV score.
     # Because test_scorer always returns 1.0 in this example, RFECV should
@@ -208,7 +224,14 @@ def test_scorer(estimator, X, y):
     # Same as the first two tests, but with step=2
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=2)
     rfecv.fit(X, y)
-    assert len(rfecv.grid_scores_) == 6
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
+    with pytest.warns(FutureWarning, match=msg):
+        assert len(rfecv.grid_scores_) == 6
+
+    for key in rfecv.cv_results_.keys():
+        assert len(rfecv.cv_results_[key]) == 6
+
     assert len(rfecv.ranking_) == X.shape[1]
     X_r = rfecv.transform(X)
     assert_array_equal(X_r, iris.data)
@@ -227,6 +250,8 @@ def test_scorer(estimator, X, y):
     assert_array_equal(X_r_sparse.toarray(), iris.data)
 
 
+# TODO: Remove in v1.1 when grid_scores_ is removed
+@ignore_warnings(category=FutureWarning)
 def test_rfecv_mockclassifier():
     generator = check_random_state(0)
     iris = load_iris()
@@ -237,7 +262,13 @@ def test_rfecv_mockclassifier():
     rfecv = RFECV(estimator=MockClassifier(), step=1)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
     assert len(rfecv.grid_scores_) == X.shape[1]
+
+    for key in rfecv.cv_results_.keys():
+        assert len(rfecv.cv_results_[key]) == X.shape[1]
+
     assert len(rfecv.ranking_) == X.shape[1]
 
 
@@ -260,7 +291,9 @@ def test_rfecv_verbose_output():
     assert len(verbose_output.readline()) > 0
 
 
-def test_rfecv_grid_scores_size():
+# TODO: Remove in v1.1 when grid_scores_ is removed
+@ignore_warnings(category=FutureWarning)
+def test_rfecv_cv_results_size():
     generator = check_random_state(0)
     iris = load_iris()
     X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
@@ -275,7 +308,13 @@ def test_rfecv_grid_scores_size():
 
         score_len = np.ceil(
             (X.shape[1] - min_features_to_select) / step) + 1
+
+        # TODO: Remove in v1.1 when grid_scores_ is removed
         assert len(rfecv.grid_scores_) == score_len
+
+        for key in rfecv.cv_results_.keys():
+            assert len(rfecv.cv_results_[key]) == score_len
+
         assert len(rfecv.ranking_) == X.shape[1]
         assert rfecv.n_features_ >= min_features_to_select
 
@@ -311,6 +350,8 @@ def test_rfe_min_step():
     assert sel.support_.sum() == n_features // 2
 
 
+# TODO: Remove in v1.1 when grid_scores_ is removed
+@ignore_warnings(category=FutureWarning)
 def test_number_of_subsets_of_features():
     # In RFE, 'number_of_subsets_of_features'
     # = the number of iterations in '_fit'
@@ -348,7 +389,7 @@ def formula2(n_features, n_features_to_select, step):
 
     # In RFECV, 'fit' calls 'RFE._fit'
     # 'number_of_subsets_of_features' of RFE
-    # = the size of 'grid_scores' of RFECV
+    # = the size of each score in 'cv_results_' of RFECV
     # = the number of iterations of the for loop before optimization #4534
 
     # RFECV, n_features_to_select = 1
@@ -365,12 +406,21 @@ def formula2(n_features, n_features_to_select, step):
         rfecv = RFECV(estimator=SVC(kernel="linear"), step=step)
         rfecv.fit(X, y)
 
-        assert (rfecv.grid_scores_.shape[0] ==
+        # TODO: Remove in v1.1 when grid_scores_ is removed
+        assert (len(rfecv.grid_scores_) ==
                 formula1(n_features, n_features_to_select, step))
-        assert (rfecv.grid_scores_.shape[0] ==
+        assert (len(rfecv.grid_scores_) ==
                 formula2(n_features, n_features_to_select, step))
 
+        for key in rfecv.cv_results_.keys():
+            assert (len(rfecv.cv_results_[key]) ==
+                    formula1(n_features, n_features_to_select, step))
+            assert (len(rfecv.cv_results_[key]) ==
+                    formula2(n_features, n_features_to_select, step))
+
 
+# TODO: Remove in v1.1 when grid_scores_ is removed
+@ignore_warnings(category=FutureWarning)
 def test_rfe_cv_n_jobs():
     generator = check_random_state(0)
     iris = load_iris()
@@ -380,13 +430,23 @@ def test_rfe_cv_n_jobs():
     rfecv = RFECV(estimator=SVC(kernel='linear'))
     rfecv.fit(X, y)
     rfecv_ranking = rfecv.ranking_
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
     rfecv_grid_scores = rfecv.grid_scores_
 
+    rfecv_cv_results_ = rfecv.cv_results_
+
     rfecv.set_params(n_jobs=2)
     rfecv.fit(X, y)
     assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)
+
+    # TODO: Remove in v1.1 when grid_scores_ is removed
     assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
 
+    assert rfecv_cv_results_.keys() == rfecv.cv_results_.keys()
+    for key in rfecv_cv_results_.keys():
+        assert rfecv_cv_results_[key] == pytest.approx(rfecv.cv_results_[key])
+
 
 def test_rfe_cv_groups():
     generator = check_random_state(0)
@@ -478,6 +538,25 @@ def test_w_pipeline_2d_coef_():
     assert sfm.transform(data).shape[1] == 2
 
 
+def test_rfecv_std_and_mean():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    rfecv = RFECV(estimator=SVC(kernel='linear'))
+    rfecv.fit(X, y)
+    n_split_keys = len(rfecv.cv_results_) - 2
+    split_keys = ['split{}_score'.format(i) for i in range(n_split_keys)]
+
+    cv_scores = np.asarray([rfecv.cv_results_[key] for key in split_keys])
+    expected_mean = np.mean(cv_scores, axis=0)
+    expected_std = np.std(cv_scores, axis=0)
+
+    assert_allclose(rfecv.cv_results_["mean_score"], expected_mean)
+    assert_allclose(rfecv.cv_results_["std_score"], expected_std)
+
+
 @pytest.mark.parametrize('ClsRFE', [
     RFE,
     RFECV