scikit-learn · qinhanmin2014 · Dec 9, 2019 · Nov 18, 2019 · Nov 18, 2019 · Nov 18, 2019
diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
@@ -47,10 +47,23 @@ Changelog
 :mod:`sklearn.cluster`
 ......................
 
+- |Fix| example fix in model XXX. :pr:`xxxx` or :issue:`xxxx` by
+  :user:`name <user id>`
+
+:mod:`sklearn.linear_model`
+...........................
+
 - |Enhancement| :class:`cluster.AgglomerativeClustering` has a faster and more
   more memory efficient implementation of single linkage clustering.
   :pr:`11514` by :user:`Leland McInnes <lmcinnes>`.
 
+- |Efficiency| :class:`linear_model.RidgeCV` and
+  :class:`linear_model.RidgeClassifierCV` now does not allocate a
+  potentially large array to store dual coefficients for all hyperparameters
+  during its `fit`, nor an array to store all LOO predictions unless
+  `store_cv_values` is `True`.
+  :pr:`15652` by :user:`Jérôme Dockès <jeromedockes>`.
+
 :mod:`sklearn.preprocessing`
 ............................
 

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
@@ -1054,6 +1054,16 @@ def _matmat(self, v):
         return res
 
 
+class _IdentityEstimator:
+    """Hack to call a scorer when we already have the predictions."""
+
+    def decision_function(self, y_predict):
+        return y_predict
+
+    def predict(self, y_predict):
+        return y_predict
+
+
 class _RidgeGCV(LinearModel):
     """Ridge regression with built-in Generalized Cross-Validation
 
@@ -1087,6 +1097,10 @@ class _RidgeGCV(LinearModel):
 
     looe = y - loov = c / diag(G^-1)
 
+    The best score (negative mean squared error or user-provided scoring) is
+    stored in the `best_score_` attribute, and the selected hyperparameter in
+    `alpha_`.
+
     References
     ----------
     http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf
@@ -1462,43 +1476,40 @@ def fit(self, X, y, sample_weight=None):
         else:
             sqrt_sw = np.ones(X.shape[0], dtype=X.dtype)
 
+        X_mean, *decomposition = decompose(X, y, sqrt_sw)
+
         scorer = check_scoring(self, scoring=self.scoring, allow_none=True)
         error = scorer is None
 
         n_y = 1 if len(y.shape) == 1 else y.shape[1]
-        cv_values = np.zeros((n_samples * n_y, len(self.alphas)),
-                             dtype=X.dtype)
-        C = []
-        X_mean, *decomposition = decompose(X, y, sqrt_sw)
+
+        if self.store_cv_values:
+            self.cv_values_ = np.empty(
+                (n_samples * n_y, len(self.alphas)), dtype=X.dtype)
+
+        best_coef, best_score, best_alpha = None, None, None
+
         for i, alpha in enumerate(self.alphas):
             G_inverse_diag, c = solve(
                 float(alpha), y, sqrt_sw, X_mean, *decomposition)
             if error:
                 squared_errors = (c / G_inverse_diag) ** 2
-                cv_values[:, i] = squared_errors.ravel()
+                alpha_score = -squared_errors.mean()
+                if self.store_cv_values:
+                    self.cv_values_[:, i] = squared_errors.ravel()
             else:
                 predictions = y - (c / G_inverse_diag)
-                cv_values[:, i] = predictions.ravel()
-            C.append(c)
+                alpha_score = scorer(
+                    _IdentityEstimator(), predictions.ravel(), y.ravel())
+                if self.store_cv_values:
+                    self.cv_values_[:, i] = predictions.ravel()
 
-        if error:
-            best = cv_values.mean(axis=0).argmin()
-        else:
-            # The scorer want an object that will make the predictions but
-            # they are already computed efficiently by _RidgeGCV. This
-            # identity_estimator will just return them
-            def identity_estimator():
-                pass
-            identity_estimator.decision_function = lambda y_predict: y_predict
-            identity_estimator.predict = lambda y_predict: y_predict
-
-            # signature of scorer is (estimator, X, y)
-            out = [scorer(identity_estimator, cv_values[:, i], y.ravel())
-                   for i in range(len(self.alphas))]
-            best = np.argmax(out)
-
-        self.alpha_ = self.alphas[best]
-        self.dual_coef_ = C[best]
+            if (best_score is None) or (alpha_score > best_score):
+                best_coef, best_score, best_alpha = c, alpha_score, alpha
+
+        self.alpha_ = best_alpha
+        self.best_score_ = best_score
+        self.dual_coef_ = best_coef
         self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)
 
         X_offset += X_mean * X_scale
@@ -1509,7 +1520,7 @@ def identity_estimator():
                 cv_values_shape = n_samples, len(self.alphas)
             else:
                 cv_values_shape = n_samples, n_y, len(self.alphas)
-            self.cv_values_ = cv_values.reshape(cv_values_shape)
+            self.cv_values_ = self.cv_values_.reshape(cv_values_shape)
 
         return self
 

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
@@ -34,6 +34,7 @@
 from sklearn.linear_model._ridge import _check_gcv_mode
 from sklearn.linear_model._ridge import _X_CenterStackOp
 from sklearn.datasets import make_regression
+from sklearn.datasets import make_classification
 
 from sklearn.model_selection import GridSearchCV
 from sklearn.model_selection import KFold, GroupKFold, cross_val_predict
@@ -661,6 +662,19 @@ def _test_ridge_cv(filter_):
     assert type(ridge_cv.intercept_) == np.float64
 
 
+@pytest.mark.parametrize(
+    "ridge, make_dataset",
+    [(RidgeCV(), make_regression),
+     (RidgeClassifierCV(), make_classification)]
+)
+def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
+    # Check that `cv_values_` is not stored when store_cv_values is False
+    X, y = make_dataset(n_samples=6, random_state=42)
+    ridge.set_params(store_cv_values=False)
+    ridge.fit(X, y)
+    assert not hasattr(ridge, "cv_values_")
+
+
 def _test_ridge_diabetes(filter_):
     ridge = Ridge(fit_intercept=False)
     ridge.fit(filter_(X_diabetes), y_diabetes)
@@ -818,7 +832,8 @@ def test_class_weights_cv():
     assert_array_equal(reg.predict([[-.2, 2]]), np.array([-1]))
 
 
-def test_ridgecv_store_cv_values():
+@pytest.mark.parametrize("scoring", [None, 'neg_mean_squared_error'])
+def test_ridgecv_store_cv_values(scoring):
     rng = np.random.RandomState(42)
 
     n_samples = 8
@@ -827,7 +842,7 @@ def test_ridgecv_store_cv_values():
     alphas = [1e-1, 1e0, 1e1]
     n_alphas = len(alphas)
 
-    r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True)
+    r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True, scoring=scoring)
 
     # with len(y.shape) == 1
     y = rng.randn(n_samples)
@@ -840,7 +855,7 @@ def test_ridgecv_store_cv_values():
     r.fit(x, y)
     assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
 
-    r = RidgeCV(cv=3, store_cv_values=True)
+    r = RidgeCV(cv=3, store_cv_values=True, scoring=scoring)
     assert_raises_regex(ValueError, 'cv!=None and store_cv_values',
                         r.fit, x, y)