scikit-learn · glemaitre · Jul 30, 2021 · Apr 11, 2020 · Apr 18, 2020 · May 10, 2020
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -307,6 +307,10 @@ Changelog
   when the variance threshold is negative.
   :pr:`20207` by :user:`Tomohiro Endo <europeanplaice>`
 
+- |API| Deprecates `grid_scores_` in favor of split scores in `cv_results_` in
+  :class:`feature_selection.RFECV`. `grid_scores_` will be removed in version 1.2.
+  :pr:`20161` by :user:`Shuhei Kayawari <wowry>` and :user:`arka204`.
+
 :mod:`sklearn.inspection`
 .........................
 
@@ -428,9 +432,9 @@ Changelog
   and :user:`Oliver Grisel <ogrisel>`.
 
 - |Feature|  :func:`metrics.mean_squared_log_error` now supports
-  `squared=False`. 
+  `squared=False`.
   :pr:`20326` by :user:`Uttam kumar <helper-uttam>`.
-  
+
 - |Efficiency| Improved speed of :func:`metrics.confusion_matrix` when labels
   are integral.
   :pr:`9843` by :user:`Jon Crall <Erotemic>`.

diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -16,6 +16,7 @@
 from ..utils._tags import _safe_tags
 from ..utils.validation import check_is_fitted
 from ..utils.fixes import delayed
+from ..utils.deprecation import deprecated
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
 from ..base import clone
@@ -497,6 +498,24 @@ class RFECV(RFE):
         ``grid_scores_[i]`` corresponds to
         the CV score of the i-th subset of features.
 
+        .. deprecated:: 1.0
+            The `grid_scores_` attribute is deprecated in version 1.0 in favor
+            of `cv_results_` and will be removed in version 1.2.
+
+    cv_results_ : dict of ndarrays
+        A dict with keys:
+
+        split(k)_score : ndarray of shape (n_features,)
+            The cross-validation scores across (k)th fold.
+
+        mean_score : ndarray of shape (n_features,)
+            Mean of scores over the folds.
+
+        std_score : ndarray of shape (n_features,)
+            Standard deviation of scores over the folds.
+
+        .. versionadded:: 1.0
+
     n_features_ : int
         The number of selected features with cross-validation.
 
@@ -650,9 +669,10 @@ def fit(self, X, y, groups=None):
             for train, test in cv.split(X, y, groups)
         )
 
-        scores = np.sum(scores, axis=0)
-        scores_rev = scores[::-1]
-        argmax_idx = len(scores) - np.argmax(scores_rev) - 1
+        scores = np.array(scores)
+        scores_sum = np.sum(scores, axis=0)
+        scores_sum_rev = scores_sum[::-1]
+        argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1
         n_features_to_select = max(
             n_features - (argmax_idx * step), self.min_features_to_select
         )
@@ -675,7 +695,27 @@ def fit(self, X, y, groups=None):
         self.estimator_ = clone(self.estimator)
         self.estimator_.fit(self.transform(X), y)
 
-        # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
-        # here, the scores are normalized by get_n_splits(X, y)
-        self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)
+        # reverse to stay consistent with before
+        scores_rev = scores[:, ::-1]
+        self.cv_results_ = {}
+        self.cv_results_["mean_score"] = np.mean(scores_rev, axis=0)
+        self.cv_results_["std_score"] = np.std(scores_rev, axis=0)
+
+        for i in range(scores.shape[0]):
+            self.cv_results_[f"split{i}_score"] = scores_rev[i]
+
         return self
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "The grid_scores_ attribute is deprecated in version 1.0 in favor "
+        "of cv_results_ and will be removed in version 1.2."
+    )
+    @property
+    def grid_scores_(self):
+        # remove 2 for mean_score, std_score
+        grid_size = len(self.cv_results_) - 2
+        return np.asarray(
+            [self.cv_results_["split{}_score".format(i)] for i in range(grid_size)]
+        ).T
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
@@ -5,7 +5,7 @@
 from operator import attrgetter
 import pytest
 import numpy as np
-from numpy.testing import assert_array_almost_equal, assert_array_equal
+from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose
 from scipy import sparse
 
 from sklearn.feature_selection import RFE, RFECV
@@ -163,7 +163,18 @@ def test_rfecv():
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=1)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
-    assert len(rfecv.grid_scores_) == X.shape[1]
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    msg = (
+        r"The grid_scores_ attribute is deprecated in version 1\.0 in "
+        r"favor of cv_results_ and will be removed in version 1\.2."
+    )
+    with pytest.warns(FutureWarning, match=msg):
+        assert len(rfecv.grid_scores_) == X.shape[1]
+
+    for key in rfecv.cv_results_.keys():
+        assert len(rfecv.cv_results_[key]) == X.shape[1]
+
     assert len(rfecv.ranking_) == X.shape[1]
     X_r = rfecv.transform(X)
 
@@ -191,13 +202,17 @@ def test_rfecv():
     X_r = rfecv.transform(X)
     assert_array_equal(X_r, iris.data)
 
-    # Test fix on grid_scores
+    # Test fix on cv_results_
     def test_scorer(estimator, X, y):
         return 1.0
 
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=test_scorer)
     rfecv.fit(X, y)
-    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    with pytest.warns(FutureWarning, match=msg):
+        assert_array_equal(rfecv.grid_scores_, np.ones(rfecv.grid_scores_.shape))
+
     # In the event of cross validation score ties, the expected behavior of
     # RFECV is to return the FEWEST features that maximize the CV score.
     # Because test_scorer always returns 1.0 in this example, RFECV should
@@ -207,7 +222,14 @@ def test_scorer(estimator, X, y):
     # Same as the first two tests, but with step=2
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=2)
     rfecv.fit(X, y)
-    assert len(rfecv.grid_scores_) == 6
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    with pytest.warns(FutureWarning, match=msg):
+        assert len(rfecv.grid_scores_) == 6
+
+    for key in rfecv.cv_results_.keys():
+        assert len(rfecv.cv_results_[key]) == 6
+
     assert len(rfecv.ranking_) == X.shape[1]
     X_r = rfecv.transform(X)
     assert_array_equal(X_r, iris.data)
@@ -236,7 +258,18 @@ def test_rfecv_mockclassifier():
     rfecv = RFECV(estimator=MockClassifier(), step=1)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
-    assert len(rfecv.grid_scores_) == X.shape[1]
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    msg = (
+        r"The grid_scores_ attribute is deprecated in version 1\.0 in "
+        r"favor of cv_results_ and will be removed in version 1\.2."
+    )
+    with pytest.warns(FutureWarning, match=msg):
+        assert len(rfecv.grid_scores_) == X.shape[1]
+
+    for key in rfecv.cv_results_.keys():
+        assert len(rfecv.cv_results_[key]) == X.shape[1]
+
     assert len(rfecv.ranking_) == X.shape[1]
 
 
@@ -260,7 +293,7 @@ def test_rfecv_verbose_output():
     assert len(verbose_output.readline()) > 0
 
 
-def test_rfecv_grid_scores_size():
+def test_rfecv_cv_results_size():
     generator = check_random_state(0)
     iris = load_iris()
     X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
@@ -277,7 +310,18 @@ def test_rfecv_grid_scores_size():
         rfecv.fit(X, y)
 
         score_len = np.ceil((X.shape[1] - min_features_to_select) / step) + 1
-        assert len(rfecv.grid_scores_) == score_len
+
+        # TODO: Remove in v1.2 when grid_scores_ is removed
+        msg = (
+            r"The grid_scores_ attribute is deprecated in version 1\.0 in "
+            r"favor of cv_results_ and will be removed in version 1\.2."
+        )
+        with pytest.warns(FutureWarning, match=msg):
+            assert len(rfecv.grid_scores_) == score_len
+
+        for key in rfecv.cv_results_.keys():
+            assert len(rfecv.cv_results_[key]) == score_len
+
         assert len(rfecv.ranking_) == X.shape[1]
         assert rfecv.n_features_ >= min_features_to_select
 
@@ -352,7 +396,7 @@ def formula2(n_features, n_features_to_select, step):
 
     # In RFECV, 'fit' calls 'RFE._fit'
     # 'number_of_subsets_of_features' of RFE
-    # = the size of 'grid_scores' of RFECV
+    # = the size of each score in 'cv_results_' of RFECV
     # = the number of iterations of the for loop before optimization #4534
 
     # RFECV, n_features_to_select = 1
@@ -369,12 +413,26 @@ def formula2(n_features, n_features_to_select, step):
         rfecv = RFECV(estimator=SVC(kernel="linear"), step=step)
         rfecv.fit(X, y)
 
-        assert rfecv.grid_scores_.shape[0] == formula1(
-            n_features, n_features_to_select, step
-        )
-        assert rfecv.grid_scores_.shape[0] == formula2(
-            n_features, n_features_to_select, step
+        # TODO: Remove in v1.2 when grid_scores_ is removed
+        msg = (
+            r"The grid_scores_ attribute is deprecated in version 1\.0 in "
+            r"favor of cv_results_ and will be removed in version 1\.2."
         )
+        with pytest.warns(FutureWarning, match=msg):
+            assert len(rfecv.grid_scores_) == formula1(
+                n_features, n_features_to_select, step
+            )
+            assert len(rfecv.grid_scores_) == formula2(
+                n_features, n_features_to_select, step
+            )
+
+        for key in rfecv.cv_results_.keys():
+            assert len(rfecv.cv_results_[key]) == formula1(
+                n_features, n_features_to_select, step
+            )
+            assert len(rfecv.cv_results_[key]) == formula2(
+                n_features, n_features_to_select, step
+            )
 
 
 def test_rfe_cv_n_jobs():
@@ -386,12 +444,28 @@ def test_rfe_cv_n_jobs():
     rfecv = RFECV(estimator=SVC(kernel="linear"))
     rfecv.fit(X, y)
     rfecv_ranking = rfecv.ranking_
-    rfecv_grid_scores = rfecv.grid_scores_
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    msg = (
+        r"The grid_scores_ attribute is deprecated in version 1\.0 in "
+        r"favor of cv_results_ and will be removed in version 1\.2."
+    )
+    with pytest.warns(FutureWarning, match=msg):
+        rfecv_grid_scores = rfecv.grid_scores_
+
+    rfecv_cv_results_ = rfecv.cv_results_
 
     rfecv.set_params(n_jobs=2)
     rfecv.fit(X, y)
     assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)
-    assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
+
+    # TODO: Remove in v1.2 when grid_scores_ is removed
+    with pytest.warns(FutureWarning, match=msg):
+        assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
+
+    assert rfecv_cv_results_.keys() == rfecv.cv_results_.keys()
+    for key in rfecv_cv_results_.keys():
+        assert rfecv_cv_results_[key] == pytest.approx(rfecv.cv_results_[key])
 
 
 def test_rfe_cv_groups():
@@ -486,6 +560,25 @@ def test_w_pipeline_2d_coef_():
     assert sfm.transform(data).shape[1] == 2
 
 
+def test_rfecv_std_and_mean():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    rfecv = RFECV(estimator=SVC(kernel="linear"))
+    rfecv.fit(X, y)
+    n_split_keys = len(rfecv.cv_results_) - 2
+    split_keys = ["split{}_score".format(i) for i in range(n_split_keys)]
+
+    cv_scores = np.asarray([rfecv.cv_results_[key] for key in split_keys])
+    expected_mean = np.mean(cv_scores, axis=0)
+    expected_std = np.std(cv_scores, axis=0)
+
+    assert_allclose(rfecv.cv_results_["mean_score"], expected_mean)
+    assert_allclose(rfecv.cv_results_["std_score"], expected_std)
+
+
 @pytest.mark.parametrize("ClsRFE", [RFE, RFECV])
 def test_multioutput(ClsRFE):
     X = np.random.normal(size=(10, 3))