scikit-learn · jnothman · Aug 6, 2017 · Jun 11, 2017 · Jun 12, 2017 · Jun 14, 2017
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -239,6 +239,9 @@ Decomposition, manifold learning and clustering
   ``singular_values_``, like in :class:`decomposition.IncrementalPCA`.
   :issue:`7685` by :user:`Tommy Löfstedt <tomlof>`
 
+- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`.
+  :issue:`9108` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
 - :class:`decomposition.NMF` now faster when ``beta_loss=0``.
   :issue:`9277` by :user:`hongkahjun`.
 

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
@@ -201,6 +201,9 @@ class PCA(_BasePCA):
     explained_variance_ : array, shape (n_components,)
         The amount of variance explained by each of the selected components.
 
+        Equal to n_components largest eigenvalues
+        of the covariance matrix of X.
+
         .. versionadded:: 0.18
 
     explained_variance_ratio_ : array, shape (n_components,)
@@ -232,6 +235,9 @@ class PCA(_BasePCA):
         http://www.miketipping.com/papers/met-mppca.pdf. It is required to
         computed the estimated data covariance and score samples.
 
+        Equal to the average of (n_features - n_components)
+        smallest eigenvalues of the covariance matrix of X.
+
     References
     ----------
     For n_components == 'mle', this class uses the method of `Thomas P. Minka:
@@ -494,9 +500,10 @@ def _fit_truncated(self, X, n_components, svd_solver):
         self.explained_variance_ratio_ = \
             self.explained_variance_ / total_var.sum()
         self.singular_values_ = S.copy()  # Store the singular values.
-        if self.n_components_ < n_features:
+        if self.n_components_ < min(n_features, n_samples):
             self.noise_variance_ = (total_var.sum() -
                                     self.explained_variance_.sum())
+            self.noise_variance_ /= min(n_features, n_samples) - n_components
         else:
             self.noise_variance_ = 0.
 

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
@@ -529,6 +529,26 @@ def test_pca_score3():
     assert_true(ll.argmax() == 1)
 
 
+def test_pca_score4():
+    # Ensure that the scores are correctly calculated in extreme situations
+    # Specially designed for issue #7568, #8541, #8544
+    digits = datasets.load_digits()
+    X_digits = digits.data
+
+    pca1 = PCA(n_components=30, svd_solver='full')
+    pca1.fit(X_digits)
+    score1 = pca1.score(X_digits)
+    pca2 = PCA(n_components=30, svd_solver='arpack')
+    pca2.fit(X_digits)
+    score2 = pca2.score(X_digits)
+    pca3 = PCA(n_components=30, svd_solver='randomized')
+    pca3.fit(X_digits)
+    score3 = pca3.score(X_digits)
+
+    assert_almost_equal(score1, score2, 12)
+    assert_almost_equal(score1, score3, 2)
+
+
 def test_svd_solver_auto():
     rng = np.random.RandomState(0)
     X = rng.uniform(size=(1000, 50))