scikit-learn · glemaitre · Jul 20, 2018 · Jul 17, 2018 · Jul 17, 2018 · Jul 17, 2018
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -74,6 +74,7 @@ random sampling procedures.
 - The v0.19.0 release notes failed to mention a backwards incompatibility with
   :class:`model_selection.StratifiedKFold` when ``shuffle=True`` due to
   :issue:`7823`.
+- :class:`decomposition.SparsePCA` (bug fix)
 
 Details are listed in the changelog below.
 
@@ -182,6 +183,14 @@ Decomposition, manifold learning and clustering
   This applies to the dictionary and sparse code.
   :issue:`6374` by :user:`John Kirkham <jakirkham>`.
 
+- :class:`decomposition.SparsePCA` now exposes ``normalize_components``. When
+  set to True, the train and test data are centered with the train mean 
+  repsectively during the fit phase and the transform phase. This fixes the
+  behavior of SparsePCA. When set to False, which is the default, the previous
+  abnormal behaviour still holds. The False value is for backward
+  compatibility and should not be used.
+  :issue:`11585` by :user:`Ivan Panico <FollowKenny>`.
+
 Metrics
 
 - Partial AUC is available via ``max_fpr`` parameter in

diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py
@@ -81,7 +81,8 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
     ('Sparse comp. - MiniBatchSparsePCA',
      decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8,
                                       n_iter=100, batch_size=3,
-                                      random_state=rng),
+                                      random_state=rng,
+                                      normalize_components=True),
      True),
 
     ('MiniBatchDictionaryLearning',

diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
@@ -66,6 +66,20 @@ class SparsePCA(BaseEstimator, TransformerMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    normalize_components : boolean, optional (default=False)
+        - if False, Use a version of Sparse PCA without components
+          normalization and without data centering. This is likely a bug and
+          even though it's the default for backward compatibility,
+          this should not be used.
+        - if True, Use a version of Sparse PCA with components normalization
+          and data centering.
+
+        .. versionadded:: 0.20
+        .. deprecated:: 0.22
+                ``normalize_components`` was added and set to ``False`` for
+                backward compatibility. It would be set to ``True`` from 0.22
+                onwards.
+
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -77,6 +91,10 @@ class SparsePCA(BaseEstimator, TransformerMixin):
     n_iter_ : int
         Number of iterations run.
 
+    mean_ : array, shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+        Equal to ``X.mean(axis=0)``.
+
     See also
     --------
     PCA
@@ -85,7 +103,8 @@ class SparsePCA(BaseEstimator, TransformerMixin):
     """
     def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
                  max_iter=1000, tol=1e-8, method='lars', n_jobs=1, U_init=None,
-                 V_init=None, verbose=False, random_state=None):
+                 V_init=None, verbose=False, random_state=None,
+                 normalize_components=False):
         self.n_components = n_components
         self.alpha = alpha
         self.ridge_alpha = ridge_alpha
@@ -97,6 +116,7 @@ def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
         self.V_init = V_init
         self.verbose = verbose
         self.random_state = random_state
+        self.normalize_components = normalize_components
 
     def fit(self, X, y=None):
         """Fit the model from data in X.
@@ -116,6 +136,17 @@ def fit(self, X, y=None):
         """
         random_state = check_random_state(self.random_state)
         X = check_array(X)
+
+        if self.normalize_components:
+            self.mean_ = np.mean(X, axis=0)
+            X = X - self.mean_
+        else:
+            warnings.warn("normalize_components=False is a "
+                          "backward-compatible setting that implements a "
+                          "non-standard definition of sparse PCA. This "
+                          "compatibility mode will be removed in 0.22.",
+                          DeprecationWarning)
+
         if self.n_components is None:
             n_components = X.shape[1]
         else:
@@ -134,6 +165,13 @@ def fit(self, X, y=None):
                                                return_n_iter=True
                                                )
         self.components_ = Vt.T
+
+        if self.normalize_components:
+            components_norm = \
+                    np.linalg.norm(self.components_, axis=1)[:, np.newaxis]
+            components_norm[components_norm == 0] = 1
+            self.components_ /= components_norm
+
         self.error_ = E
         return self
 
@@ -178,11 +216,18 @@ def transform(self, X, ridge_alpha='deprecated'):
                 ridge_alpha = self.ridge_alpha
         else:
             ridge_alpha = self.ridge_alpha
+
+        if self.normalize_components:
+            X = X - self.mean_
+
         U = ridge_regression(self.components_.T, X.T, ridge_alpha,
                              solver='cholesky')
-        s = np.sqrt((U ** 2).sum(axis=0))
-        s[s == 0] = 1
-        U /= s
+
+        if not self.normalize_components:
+            s = np.sqrt((U ** 2).sum(axis=0))
+            s[s == 0] = 1
+            U /= s
+
         return U
 
 
@@ -239,6 +284,20 @@ class MiniBatchSparsePCA(SparsePCA):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    normalize_components : boolean
+        - if False, Use a version of Sparse PCA without components
+          normalization and without data centering. This is likely a bug
+          and even though it's the default for backward compatibility,
+          this should not be used.
+        - if True, Use a version of Sparse PCA with components normalization
+          and data centering.
+
+        .. versionadded:: 0.20
+        .. deprecated:: 0.22
+                ``normalize_components`` was added and set to ``False`` for
+                backward compatibility. It would be set to ``True`` from 0.22
+                onwards.
+
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -247,6 +306,10 @@ class MiniBatchSparsePCA(SparsePCA):
     n_iter_ : int
         Number of iterations run.
 
+    mean_ : array, shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+        Equal to ``X.mean(axis=0)``.
+
     See also
     --------
     PCA
@@ -255,11 +318,13 @@ class MiniBatchSparsePCA(SparsePCA):
     """
     def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
                  n_iter=100, callback=None, batch_size=3, verbose=False,
-                 shuffle=True, n_jobs=1, method='lars', random_state=None):
+                 shuffle=True, n_jobs=1, method='lars', random_state=None,
+                 normalize_components=False):
         super(MiniBatchSparsePCA, self).__init__(
             n_components=n_components, alpha=alpha, verbose=verbose,
             ridge_alpha=ridge_alpha, n_jobs=n_jobs, method=method,
-            random_state=random_state)
+            random_state=random_state,
+            normalize_components=normalize_components)
         self.n_iter = n_iter
         self.callback = callback
         self.batch_size = batch_size
@@ -283,6 +348,17 @@ def fit(self, X, y=None):
         """
         random_state = check_random_state(self.random_state)
         X = check_array(X)
+
+        if self.normalize_components:
+            self.mean_ = np.mean(X, axis=0)
+            X = X - self.mean_
+        else:
+            warnings.warn("normalize_components=False is a "
+                          "backward-compatible setting that implements a "
+                          "non-standard definition of sparse PCA. This "
+                          "compatibility mode will be removed in 0.22.",
+                          DeprecationWarning)
+
         if self.n_components is None:
             n_components = X.shape[1]
         else:
@@ -298,4 +374,11 @@ def fit(self, X, y=None):
             random_state=random_state,
             return_n_iter=True)
         self.components_ = Vt.T
+
+        if self.normalize_components:
+            components_norm = \
+                    np.linalg.norm(self.components_, axis=1)[:, np.newaxis]
+            components_norm[components_norm == 0] = 1
+            self.components_ /= components_norm
+
         return self