New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG+1] Made PCA expose the singular values #7685
Changes from 16 commits
c637fb0
8897ab5
d2f913f
4687649
ef16c4c
2324939
e807663
23c701a
510a9a1
c11f175
5b4c183
948f19a
fd6c962
38d7e1d
e3acdbf
51a71f0
e9fe5e7
c4a4943
44ab311
15816fe
566f49d
d3df7ed
ce17751
279fd60
9fc3420
ae86e2f
2b731f0
b5a6356
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -186,23 +186,28 @@ class PCA(_BasePCA): | |
|
||
Attributes | ||
---------- | ||
components_ : array, [n_components, n_features] | ||
components_ : array, shape (n_components, n_features) | ||
Principal axes in feature space, representing the directions of | ||
maximum variance in the data. The components are sorted by | ||
``explained_variance_``. | ||
|
||
explained_variance_ : array, [n_components] | ||
explained_variance_ : array, shape (n_components,) | ||
The amount of variance explained by each of the selected components. | ||
|
||
.. versionadded:: 0.18 | ||
|
||
explained_variance_ratio_ : array, [n_components] | ||
explained_variance_ratio_ : array, shape (n_components,) | ||
Percentage of variance explained by each of the selected components. | ||
|
||
If ``n_components`` is not set then all components are stored and the | ||
sum of explained variances is equal to 1.0. | ||
|
||
mean_ : array, [n_features] | ||
singular_values_ : array, shape (n_components,) | ||
The singular values corresponding to each of the selected components. | ||
The singular values are equal to the 2-norms of the ``n_components`` | ||
variables in the lower-dimensional space. | ||
|
||
mean_ : array, shape (n_features,) | ||
Per-feature empirical mean, estimated from the training set. | ||
|
||
Equal to `X.mean(axis=1)`. | ||
|
@@ -250,22 +255,28 @@ class PCA(_BasePCA): | |
>>> pca.fit(X) | ||
PCA(copy=True, iterated_power='auto', n_components=2, random_state=None, | ||
svd_solver='auto', tol=0.0, whiten=False) | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
[ 0.99244... 0.00755...] | ||
>>> print(pca.singular_values_) # doctest: +ELLIPSIS | ||
[ 6.30061... 0.54980...] | ||
|
||
>>> pca = PCA(n_components=2, svd_solver='full') | ||
>>> pca.fit(X) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE | ||
PCA(copy=True, iterated_power='auto', n_components=2, random_state=None, | ||
svd_solver='full', tol=0.0, whiten=False) | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
[ 0.99244... 0.00755...] | ||
>>> print(pca.singular_values_) # doctest: +ELLIPSIS | ||
[ 6.30061... 0.54980...] | ||
|
||
>>> pca = PCA(n_components=1, svd_solver='arpack') | ||
>>> pca.fit(X) | ||
PCA(copy=True, iterated_power='auto', n_components=1, random_state=None, | ||
svd_solver='arpack', tol=0.0, whiten=False) | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
[ 0.99244...] | ||
>>> print(pca.singular_values_) # doctest: +ELLIPSIS | ||
[ 6.30061...] | ||
|
||
See also | ||
-------- | ||
|
@@ -385,6 +396,7 @@ def _fit_full(self, X, n_components): | |
explained_variance_ = (S ** 2) / n_samples | ||
total_var = explained_variance_.sum() | ||
explained_variance_ratio_ = explained_variance_ / total_var | ||
singular_values_ = S.copy() # Store the singular values. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't this be calculated by the user as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry. Stupid question. I've read the issue and figure this is all about making something comparable available in TruncatedSVD. |
||
|
||
# Postprocess the number of components required | ||
if n_components == 'mle': | ||
|
@@ -409,6 +421,7 @@ def _fit_full(self, X, n_components): | |
self.explained_variance_ = explained_variance_[:n_components] | ||
self.explained_variance_ratio_ = \ | ||
explained_variance_ratio_[:n_components] | ||
self.singular_values_ = singular_values_[:n_components] | ||
|
||
return U, S, V | ||
|
||
|
@@ -463,6 +476,7 @@ def _fit_truncated(self, X, n_components, svd_solver): | |
total_var = np.var(X, axis=0) | ||
self.explained_variance_ratio_ = \ | ||
self.explained_variance_ / total_var.sum() | ||
self.singular_values_ = S.copy() # Store the singular values. | ||
if self.n_components_ < n_features: | ||
self.noise_variance_ = (total_var.sum() - | ||
self.explained_variance_.sum()) | ||
|
@@ -520,9 +534,11 @@ def score(self, X, y=None): | |
return np.mean(self.score_samples(X)) | ||
|
||
|
||
@deprecated("RandomizedPCA was deprecated in 0.18 and will be removed in 0.20. " | ||
@deprecated("RandomizedPCA was deprecated in 0.18 and will be removed in " | ||
"0.20. " | ||
"Use PCA(svd_solver='randomized') instead. The new implementation " | ||
"DOES NOT store whiten ``components_``. Apply transform to get them.") | ||
"DOES NOT store whiten ``components_``. Apply transform to get " | ||
"them.") | ||
class RandomizedPCA(BaseEstimator, TransformerMixin): | ||
"""Principal component analysis (PCA) using randomized SVD | ||
|
||
|
@@ -549,8 +565,8 @@ class RandomizedPCA(BaseEstimator, TransformerMixin): | |
.. versionchanged:: 0.18 | ||
|
||
whiten : bool, optional | ||
When True (False by default) the `components_` vectors are multiplied by | ||
the square root of (n_samples) and divided by the singular values to | ||
When True (False by default) the `components_` vectors are multiplied | ||
by the square root of (n_samples) and divided by the singular values to | ||
ensure uncorrelated outputs with unit component-wise variances. | ||
|
||
Whitening will remove some information from the transformed signal | ||
|
@@ -564,15 +580,20 @@ class RandomizedPCA(BaseEstimator, TransformerMixin): | |
|
||
Attributes | ||
---------- | ||
components_ : array, [n_components, n_features] | ||
components_ : array, shape (n_components, n_features) | ||
Components with maximum variance. | ||
|
||
explained_variance_ratio_ : array, [n_components] | ||
explained_variance_ratio_ : array, shape (n_components,) | ||
Percentage of variance explained by each of the selected components. | ||
k is not set then all components are stored and the sum of explained | ||
variances is equal to 1.0 | ||
If k is not set then all components are stored and the sum of explained | ||
variances is equal to 1.0. | ||
|
||
singular_values_ : array, shape (n_components,) | ||
The singular values corresponding to each of the selected components. | ||
The singular values are equal to the 2-norms of the ``n_components`` | ||
variables in the lower-dimensional space. | ||
|
||
mean_ : array, [n_features] | ||
mean_ : array, shape (n_features,) | ||
Per-feature empirical mean, estimated from the training set. | ||
|
||
Examples | ||
|
@@ -584,8 +605,10 @@ class RandomizedPCA(BaseEstimator, TransformerMixin): | |
>>> pca.fit(X) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE | ||
RandomizedPCA(copy=True, iterated_power=2, n_components=2, | ||
random_state=None, whiten=False) | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
>>> print(pca.explained_variance_ratio_) # doctest: +ELLIPSIS | ||
[ 0.99244... 0.00755...] | ||
>>> print(pca.singular_values_) # doctest: +ELLIPSIS | ||
[ 6.30061... 0.54980...] | ||
|
||
See also | ||
-------- | ||
|
@@ -663,6 +686,7 @@ def _fit(self, X): | |
self.explained_variance_ = exp_var = (S ** 2) / n_samples | ||
full_var = np.var(X, axis=0).sum() | ||
self.explained_variance_ratio_ = exp_var / full_var | ||
self.singular_values_ = S # Store the singular values. | ||
|
||
if self.whiten: | ||
self.components_ = V / S[:, np.newaxis] * sqrt(n_samples) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any reason to add this here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really, but the other attributes were set there, so I thought it would be good for consistency to have the singular values be set there as well.