New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor PCA class: able to use different decomposition techniques (exact, randomized, QUIC SVD). #716
Refactor PCA class: able to use different decomposition techniques (exact, randomized, QUIC SVD). #716
Changes from 3 commits
4727298
05b36fc
00e867f
c8d5766
5149efd
61e63b9
5f01b84
8907d5a
b8da5c6
0d6d3af
45e8cd6
01e699c
7e8abed
7bbd897
29fcf0a
cbbd671
ec0a6d5
6aedf56
570a3d8
907461f
861fe30
9101a6f
ee7ff36
10d435f
081428e
43744c5
260a48e
1d675d4
080d198
1c0192f
1127e61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
/** | ||
* @file pca.cpp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it should be pca_impl.hpp There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice catch, thanks! |
||
* @author Ajinkya Kale | ||
* @author Ryan Curtin | ||
* @author Marcus Edel | ||
* | ||
* Implementation of PCA class to perform Principal Components Analysis on the | ||
* specified data set. | ||
|
@@ -18,8 +20,8 @@ namespace mlpack { | |
namespace pca { | ||
|
||
template<typename DecompositionPolicy> | ||
PCA<DecompositionPolicy>::PCA(const bool scaleData, | ||
const DecompositionPolicy& decomposition) : | ||
PCAType<DecompositionPolicy>::PCAType(const bool scaleData, | ||
const DecompositionPolicy& decomposition) : | ||
scaleData(scaleData), | ||
decomposition(decomposition) | ||
{ } | ||
|
@@ -33,10 +35,10 @@ PCA<DecompositionPolicy>::PCA(const bool scaleData, | |
* @param coeff - PCA Loadings/Coeffs/EigenVectors | ||
*/ | ||
template<typename DecompositionPolicy> | ||
void PCA<DecompositionPolicy>::Apply(const arma::mat& data, | ||
arma::mat& transformedData, | ||
arma::vec& eigVal, | ||
arma::mat& coeff) | ||
void PCAType<DecompositionPolicy>::Apply(const arma::mat& data, | ||
arma::mat& transformedData, | ||
arma::vec& eigVal, | ||
arma::mat& coeff) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This parameter should call "eigvec" or "coeff"? They are different in pca.hpp and pca_impl.hpp |
||
{ | ||
Timer::Start("pca"); | ||
|
||
|
@@ -61,9 +63,9 @@ void PCA<DecompositionPolicy>::Apply(const arma::mat& data, | |
* @param eigVal - contains eigen values in a column vector | ||
*/ | ||
template<typename DecompositionPolicy> | ||
void PCA<DecompositionPolicy>::Apply(const arma::mat& data, | ||
arma::mat& transformedData, | ||
arma::vec& eigVal) | ||
void PCAType<DecompositionPolicy>::Apply(const arma::mat& data, | ||
arma::mat& transformedData, | ||
arma::vec& eigVal) | ||
{ | ||
arma::mat coeffs; | ||
Apply(data, transformedData, eigVal, coeffs); | ||
|
@@ -81,7 +83,8 @@ void PCA<DecompositionPolicy>::Apply(const arma::mat& data, | |
* @return Amount of the variance of the data retained (between 0 and 1). | ||
*/ | ||
template<typename DecompositionPolicy> | ||
double PCA<DecompositionPolicy>::Apply(arma::mat& data, const size_t newDimension) | ||
double PCAType<DecompositionPolicy>::Apply(arma::mat& data, | ||
const size_t newDimension) | ||
{ | ||
// Parameter validation. | ||
if (newDimension == 0) | ||
|
@@ -95,6 +98,8 @@ double PCA<DecompositionPolicy>::Apply(arma::mat& data, const size_t newDimensio | |
arma::mat coeffs; | ||
arma::vec eigVal; | ||
|
||
Timer::Start("pca"); | ||
|
||
// Center the data into a temporary matrix. | ||
arma::mat centeredData; | ||
math::Center(data, centeredData); | ||
|
@@ -112,6 +117,8 @@ double PCA<DecompositionPolicy>::Apply(arma::mat& data, const size_t newDimensio | |
// the right dimension before calculating the amount of variance retained. | ||
double eigDim = std::min(newDimension - 1, (size_t) eigVal.n_elem - 1); | ||
|
||
Timer::Stop("pca"); | ||
|
||
// Calculate the total amount of variance retained. | ||
return (sum(eigVal.subvec(0, eigDim)) / sum(eigVal)); | ||
} | ||
|
@@ -127,7 +134,8 @@ double PCA<DecompositionPolicy>::Apply(arma::mat& data, const size_t newDimensio | |
* always be greater than or equal to the varRetained parameter. | ||
*/ | ||
template<typename DecompositionPolicy> | ||
double PCA<DecompositionPolicy>::Apply(arma::mat& data, const double varRetained) | ||
double PCAType<DecompositionPolicy>::Apply(arma::mat& data, | ||
const double varRetained) | ||
{ | ||
// Parameter validation. | ||
if (varRetained < 0) | ||
|
@@ -159,7 +167,6 @@ double PCA<DecompositionPolicy>::Apply(arma::mat& data, const double varRetained | |
return varSum; | ||
} | ||
|
||
|
||
} // namespace pca | ||
} // namespace mlpack | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,7 +46,7 @@ void RunPCA(arma::mat& dataset, | |
const size_t scale, | ||
const double varToRetain) | ||
{ | ||
PCA<DecompositionPolicy> p(scale); | ||
PCAType<DecompositionPolicy> p(scale); | ||
|
||
Log::Info << "Performing PCA on dataset..." << endl; | ||
double varRetained; | ||
|
@@ -112,6 +112,12 @@ int main(int argc, char** argv) | |
{ | ||
RunPCA<QUICSVDPolicy>(dataset, newDimension, scale, varToRetain); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if a user picks an invalid decomposition policy, no error is issued? I am not sure if I am reading this right, but if so I think we should add an else to catch the error. :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right, changed in 1127e61. |
||
else | ||
{ | ||
// Invalid decomposition method. | ||
Log::Fatal << "Invalid decomposition method ('" << decompositionMethod | ||
<< "'); valid choices are 'exact', 'randomized', 'quic'." << endl; | ||
} | ||
|
||
// Now save the results. | ||
string outputFile = CLI::GetParam<string>("output_file"); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The parameter name are different, one called eigvec, and another called coeff.
Besides, The Apply function of PCA, the comments are written for the same functions and different with each other.
pca.hpp
pca_impl.hpp
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch, thanks.