From ad0991cbb5d6adaadf3deb194928725aeb1a153b Mon Sep 17 00:00:00 2001 From: Wuwei Lin Date: Fri, 18 May 2018 21:59:15 +0800 Subject: [PATCH] Cleanup and refactor PCA and FisherLDA Implement apply in PCA and FisherLDA Inherit directly from dense preproc --- src/shogun/metric/LMNNImpl.cpp | 3 +- src/shogun/preprocessor/FisherLDA.cpp | 23 +++----- src/shogun/preprocessor/FisherLDA.h | 22 ++++---- src/shogun/preprocessor/PCA.cpp | 81 +++++++++++++++------------ src/shogun/preprocessor/PCA.h | 26 ++++++--- 5 files changed, 86 insertions(+), 69 deletions(-) diff --git a/src/shogun/metric/LMNNImpl.cpp b/src/shogun/metric/LMNNImpl.cpp index 07f3be5b4d1..9573117f1f3 100644 --- a/src/shogun/metric/LMNNImpl.cpp +++ b/src/shogun/metric/LMNNImpl.cpp @@ -366,7 +366,8 @@ SGMatrix CLMNNImpl::compute_pca_transform(CDenseFeatures* CPruneVarSubMean* mean_substractor = new CPruneVarSubMean(false); // false to avoid variance normalization mean_substractor->fit(cloned_features); - mean_substractor->apply_to_feature_matrix(cloned_features); + cloned_features = mean_substractor->apply(cloned_features) + ->as>(); // Obtain the linear transform applying PCA CPCA* pca = new CPCA(); diff --git a/src/shogun/preprocessor/FisherLDA.cpp b/src/shogun/preprocessor/FisherLDA.cpp index 24fe7bf8a1c..9537127093a 100644 --- a/src/shogun/preprocessor/FisherLDA.cpp +++ b/src/shogun/preprocessor/FisherLDA.cpp @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -52,7 +51,7 @@ using namespace shogun; CFisherLDA::CFisherLDA( int32_t num_dimensions, EFLDAMethod method, float64_t thresh, float64_t gamma, bool bdc_svd) - : CDimensionReductionPreprocessor() + : CDensePreprocessor() { initialize_parameters(); m_num_dim = num_dimensions; @@ -198,13 +197,10 @@ void CFisherLDA::cleanup() m_eigenvalues_vector=SGVector(); } -SGMatrix CFisherLDA::apply_to_feature_matrix(CFeatures*features) +SGMatrix CFisherLDA::apply_to_matrix(SGMatrix matrix) { - auto simple_features = features->as>(); - auto m = simple_features->get_feature_matrix(); - - int32_t num_vectors=m.num_cols; - int32_t num_features=m.num_rows; + auto num_vectors = matrix.num_cols; + auto num_features = matrix.num_rows; SG_INFO("Transforming feature matrix\n") Map transform_matrix( @@ -213,7 +209,7 @@ SGMatrix CFisherLDA::apply_to_feature_matrix(CFeatures*features) SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features) - Map feature_matrix (m.matrix, num_features, num_vectors); + Map feature_matrix(matrix.matrix, num_features, num_vectors); feature_matrix.block(0, 0, m_num_dim, num_vectors) = transform_matrix.transpose() * feature_matrix; @@ -222,12 +218,11 @@ SGMatrix CFisherLDA::apply_to_feature_matrix(CFeatures*features) for (int32_t col=0; colset_feature_matrix(m); - return m; + matrix.num_rows = m_num_dim; + matrix.num_cols = num_vectors; + return matrix; } SGVector CFisherLDA::apply_to_feature_vector(SGVector vector) diff --git a/src/shogun/preprocessor/FisherLDA.h b/src/shogun/preprocessor/FisherLDA.h index a377d1f1479..9870b75a5c8 100644 --- a/src/shogun/preprocessor/FisherLDA.h +++ b/src/shogun/preprocessor/FisherLDA.h @@ -38,8 +38,7 @@ #include #include -#include -#include +#include #include namespace shogun @@ -88,8 +87,8 @@ namespace shogun * ::AUTO_FLDA : Automagically, the appropriate method is selected based on * whether D>N (chooses ::CANVAR_FLDA) or D + { public: /** standard constructor * @param num_dimensions number of dimensions to retain @@ -123,13 +122,6 @@ class CFisherLDA: public CDimensionReductionPreprocessor /** cleanup */ virtual void cleanup(); - /** apply preprocessor to feature matrix - * @param features on which the learned tranformation has to be applied. - * Sometimes it is also referred as projecting the given features. - * @return processed feature matrix with reduced dimensions. - */ - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /** apply preprocessor to feature vector * @param vector features on which the learned transformation has to be applied. * @return processed feature vector with reduced dimensions. @@ -159,6 +151,14 @@ class CFisherLDA: public CDimensionReductionPreprocessor void initialize_parameters(); protected: + /** apply preprocessor to feature matrix + * @param matrix on which the learned tranformation has to be applied. + * Sometimes it is also referred as projecting the given features + * matrix. + * @return processed feature matrix with reduced dimensions. + */ + virtual SGMatrix apply_to_matrix(SGMatrix matrix); + /** * Train the preprocessor with the canonical variates method. * @param features training data. diff --git a/src/shogun/preprocessor/PCA.cpp b/src/shogun/preprocessor/PCA.cpp index 2a137b20015..638fb0a66b9 100644 --- a/src/shogun/preprocessor/PCA.cpp +++ b/src/shogun/preprocessor/PCA.cpp @@ -16,8 +16,10 @@ using namespace shogun; using namespace Eigen; -CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method, EPCAMemoryMode mem_mode) -: CDimensionReductionPreprocessor() +CPCA::CPCA( + bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method, + EPCAMemoryMode mem_mode) + : CDensePreprocessor() { init(); m_whitening = do_whitening; @@ -28,7 +30,7 @@ CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method } CPCA::CPCA(EPCAMethod method, bool do_whitening, EPCAMemoryMode mem_mode) -: CDimensionReductionPreprocessor() + : CDensePreprocessor() { init(); m_whitening = do_whitening; @@ -68,6 +70,9 @@ void CPCA::init() "Method used for PCA calculation", MS_NOT_AVAILABLE); SG_ADD(&m_eigenvalue_zero_tolerance, "eigenvalue_zero_tolerance", "zero tolerance" " for determining zero eigenvalues during whitening to avoid numerical issues", MS_NOT_AVAILABLE); + SG_ADD( + &m_target_dim, "target_dim", "target dimensionality of preprocessor", + MS_AVAILABLE); } CPCA::~CPCA() @@ -281,13 +286,12 @@ void CPCA::cleanup() m_initialized = false; } -SGMatrix CPCA::apply_to_feature_matrix(CFeatures* features) +SGMatrix CPCA::apply_to_matrix(SGMatrix matrix) { ASSERT(m_initialized) - ASSERT(features != NULL) - SGMatrix m = features->as>()->get_feature_matrix(); - int32_t num_vectors = m.num_cols; - int32_t num_features = m.num_rows; + + auto num_vectors = matrix.num_cols; + auto num_features = matrix.num_rows; SG_INFO("Transforming feature matrix\n") Map transform_matrix(m_transformation_matrix.matrix, @@ -295,43 +299,39 @@ SGMatrix CPCA::apply_to_feature_matrix(CFeatures* features) if (m_mem_mode == MEM_IN_PLACE) { - if (m.matrix) - { - SG_INFO("Preprocessing feature matrix\n") - Map feature_matrix(m.matrix, num_features, num_vectors); - VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors; - feature_matrix = feature_matrix.colwise()-data_mean; + SG_INFO("Preprocessing feature matrix\n") + Map feature_matrix(matrix.matrix, num_features, num_vectors); + VectorXd data_mean = + feature_matrix.rowwise().sum() / (float64_t)num_vectors; + feature_matrix = feature_matrix.colwise() - data_mean; - feature_matrix.block(0,0,num_dim,num_vectors) = - transform_matrix.transpose()*feature_matrix; + feature_matrix.block(0, 0, num_dim, num_vectors) = + transform_matrix.transpose() * feature_matrix; - SG_INFO("Form matrix of target dimension\n") - for (int32_t col=0; col*) features)->set_feature_matrix(m); - return m; + return matrix; } else { SGMatrix ret(num_dim, num_vectors); Map ret_matrix(ret.matrix, num_dim, num_vectors); - if (m.matrix) - { - SG_INFO("Preprocessing feature matrix\n") - Map feature_matrix(m.matrix, num_features, num_vectors); - VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors; - feature_matrix = feature_matrix.colwise()-data_mean; - ret_matrix = transform_matrix.transpose()*feature_matrix; - } - ((CDenseFeatures*) features)->set_feature_matrix(ret); + SG_INFO("Preprocessing feature matrix\n") + Map feature_matrix(matrix.matrix, num_features, num_vectors); + VectorXd data_mean = + feature_matrix.rowwise().sum() / (float64_t)num_vectors; + feature_matrix = feature_matrix.colwise() - data_mean; + + ret_matrix = transform_matrix.transpose() * feature_matrix; + return ret; } } @@ -387,3 +387,14 @@ float64_t CPCA::get_eigenvalue_zero_tolerance() const { return m_eigenvalue_zero_tolerance; } + +void CPCA::set_target_dim(int32_t dim) +{ + ASSERT(dim > 0) + m_target_dim = dim; +} + +int32_t CPCA::get_target_dim() const +{ + return m_target_dim; +} diff --git a/src/shogun/preprocessor/PCA.h b/src/shogun/preprocessor/PCA.h index 38e195d5d71..ea6b8874cde 100644 --- a/src/shogun/preprocessor/PCA.h +++ b/src/shogun/preprocessor/PCA.h @@ -11,9 +11,9 @@ #include -#include #include #include +#include namespace shogun { @@ -104,7 +104,7 @@ enum EPCAMemoryMode * * Note that vectors/matrices don't have to have zero mean as it is substracted within the class. */ -class CPCA: public CDimensionReductionPreprocessor +class CPCA : public CDensePreprocessor { public: @@ -135,12 +135,6 @@ class CPCA: public CDimensionReductionPreprocessor /** cleanup */ virtual void cleanup(); - /** apply preprocessor to feature matrix - * @param features features - * @return processed feature matrix - */ - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /** apply preprocessor to feature vector * @param vector feature vector * @return processed feature vector @@ -184,10 +178,23 @@ class CPCA: public CDimensionReductionPreprocessor */ float64_t get_eigenvalue_zero_tolerance() const; + /** setter for target dimension + * @param dim target dimension + */ + void set_target_dim(int32_t dim); + + /** getter for target dimension + * @return target dimension + */ + int32_t get_target_dim() const; + protected: void init(); + virtual SGMatrix + apply_to_matrix(SGMatrix) override; + protected: /** transformation matrix */ @@ -218,6 +225,9 @@ class CPCA: public CDimensionReductionPreprocessor */ float64_t m_eigenvalue_zero_tolerance; + /** target dimension */ + int32_t m_target_dim; + private: /** Computes the transformation matrix using an eigenvalue decomposition. */ void init_with_evd(const SGMatrix& feature_matrix, int32_t max_dim_allowed);