diff --git a/src/shogun/metric/LMNNImpl.cpp b/src/shogun/metric/LMNNImpl.cpp index 6270dd20b2e..07f3be5b4d1 100644 --- a/src/shogun/metric/LMNNImpl.cpp +++ b/src/shogun/metric/LMNNImpl.cpp @@ -371,7 +371,7 @@ SGMatrix CLMNNImpl::compute_pca_transform(CDenseFeatures* // Obtain the linear transform applying PCA CPCA* pca = new CPCA(); pca->set_target_dim(cloned_features->get_num_features()); - pca->init(cloned_features); + pca->fit(cloned_features); SGMatrix pca_transform = pca->get_transformation_matrix(); SG_UNREF(pca); diff --git a/src/shogun/preprocessor/DimensionReductionPreprocessor.cpp b/src/shogun/preprocessor/DimensionReductionPreprocessor.cpp index dd70212d5c6..515b7cea06a 100644 --- a/src/shogun/preprocessor/DimensionReductionPreprocessor.cpp +++ b/src/shogun/preprocessor/DimensionReductionPreprocessor.cpp @@ -55,11 +55,6 @@ SGMatrix CDimensionReductionPreprocessor::apply_to_feature_matrix(CFe } } -bool CDimensionReductionPreprocessor::init(CFeatures* data) -{ - return true; -} - void CDimensionReductionPreprocessor::cleanup() { diff --git a/src/shogun/preprocessor/DimensionReductionPreprocessor.h b/src/shogun/preprocessor/DimensionReductionPreprocessor.h index 0886d4a9969..a6e453d3400 100644 --- a/src/shogun/preprocessor/DimensionReductionPreprocessor.h +++ b/src/shogun/preprocessor/DimensionReductionPreprocessor.h @@ -43,12 +43,6 @@ class CDimensionReductionPreprocessor: public CDensePreprocessor /** destructor */ virtual ~CDimensionReductionPreprocessor(); - /** init - * set true by default, should be defined if dimension reduction - * preprocessor is using some initialization - */ - virtual bool init(CFeatures* data); - /** cleanup * set empty by default, should be defined if dimension reduction * preprocessor should free some resources diff --git a/src/shogun/preprocessor/KernelPCA.cpp b/src/shogun/preprocessor/KernelPCA.cpp index ac7adadffd8..445c4e1148e 100644 --- a/src/shogun/preprocessor/KernelPCA.cpp +++ b/src/shogun/preprocessor/KernelPCA.cpp @@ -63,63 +63,62 @@ CKernelPCA::~CKernelPCA() SG_UNREF(m_init_features); } -bool CKernelPCA::init(CFeatures* features) +void CKernelPCA::fit(CFeatures* features) { - if (!m_initialized && m_kernel) - { - SG_REF(features); - m_init_features = features; - - m_kernel->init(features,features); - SGMatrix kernel_matrix = m_kernel->get_kernel_matrix(); - m_kernel->cleanup(); - int32_t n = kernel_matrix.num_cols; - int32_t m = kernel_matrix.num_rows; - ASSERT(n==m) - if (m_target_dim > n) - { - SG_SWARNING( - "Target dimension (%d) is not a valid value, it must be" - "less or equal than the number of vectors." - "Setting it to maximum allowed size (%d).", - m_target_dim, n); - m_target_dim = n; - } + REQUIRE(m_kernel, "Kernel not set\n"); - SGVector bias_tmp = linalg::rowwise_sum(kernel_matrix); - linalg::scale(bias_tmp, bias_tmp, -1.0 / n); - float64_t s = linalg::sum(bias_tmp) / n; - linalg::add_scalar(bias_tmp, -s); + if (m_initialized) + cleanup(); - linalg::center_matrix(kernel_matrix); + SG_REF(features); + m_init_features = features; + + m_kernel->init(features, features); + SGMatrix kernel_matrix = m_kernel->get_kernel_matrix(); + m_kernel->cleanup(); + int32_t n = kernel_matrix.num_cols; + int32_t m = kernel_matrix.num_rows; + ASSERT(n == m) + if (m_target_dim > n) + { + SG_SWARNING( + "Target dimension (%d) is not a valid value, it must be" + "less or equal than the number of vectors." + "Setting it to maximum allowed size (%d).", + m_target_dim, n); + m_target_dim = n; + } - SGVector eigenvalues(m_target_dim); - SGMatrix eigenvectors(kernel_matrix.num_rows, m_target_dim); - linalg::eigen_solver_symmetric( - kernel_matrix, eigenvalues, eigenvectors, m_target_dim); + SGVector bias_tmp = linalg::rowwise_sum(kernel_matrix); + linalg::scale(bias_tmp, bias_tmp, -1.0 / n); + float64_t s = linalg::sum(bias_tmp) / n; + linalg::add_scalar(bias_tmp, -s); - m_transformation_matrix = - SGMatrix(kernel_matrix.num_rows, m_target_dim); - // eigenvalues are in increasing order - for (int32_t i = 0; i < m_target_dim; i++) - { - //normalize and trap divide by zero and negative eigenvalues - auto idx = m_target_dim - i - 1; - auto vec = eigenvectors.get_column(idx); - linalg::scale( - vec, vec, 1.0 / std::sqrt(std::max(std::numeric_limits::epsilon(), eigenvalues[idx]))); - m_transformation_matrix.set_column(i, vec); - } + linalg::center_matrix(kernel_matrix); - m_bias_vector = SGVector(m_target_dim); - linalg::matrix_prod( - m_transformation_matrix, bias_tmp, m_bias_vector, true); + SGVector eigenvalues(m_target_dim); + SGMatrix eigenvectors(kernel_matrix.num_rows, m_target_dim); + linalg::eigen_solver_symmetric( + kernel_matrix, eigenvalues, eigenvectors, m_target_dim); - m_initialized=true; - SG_INFO("Done\n") - return true; + m_transformation_matrix = + SGMatrix(kernel_matrix.num_rows, m_target_dim); + // eigenvalues are in increasing order + for (int32_t i = 0; i < m_target_dim; i++) + { + // normalize and trap divide by zero and negative eigenvalues + auto idx = m_target_dim - i - 1; + auto vec = eigenvectors.get_column(idx); + linalg::scale( + vec, vec, 1.0 / std::sqrt(std::max(std::numeric_limits::epsilon(), eigenvalues[idx]))); + m_transformation_matrix.set_column(i, vec); } - return false; + + m_bias_vector = SGVector(m_target_dim); + linalg::matrix_prod(m_transformation_matrix, bias_tmp, m_bias_vector, true); + + m_initialized = true; + SG_INFO("Done\n") } SGMatrix CKernelPCA::apply_to_feature_matrix(CFeatures* features) diff --git a/src/shogun/preprocessor/KernelPCA.h b/src/shogun/preprocessor/KernelPCA.h index be7f763172f..a3514268bb6 100644 --- a/src/shogun/preprocessor/KernelPCA.h +++ b/src/shogun/preprocessor/KernelPCA.h @@ -42,8 +42,8 @@ class CKernelPCA: public CDimensionReductionPreprocessor virtual ~CKernelPCA(); - /// initialize preprocessor from features - virtual bool init(CFeatures* features); + virtual void fit(CFeatures* features); + /// cleanup virtual void cleanup(); diff --git a/src/shogun/preprocessor/PCA.cpp b/src/shogun/preprocessor/PCA.cpp index 2d35aa87333..2a137b20015 100644 --- a/src/shogun/preprocessor/PCA.cpp +++ b/src/shogun/preprocessor/PCA.cpp @@ -74,52 +74,46 @@ CPCA::~CPCA() { } -bool CPCA::init(CFeatures* features) +void CPCA::fit(CFeatures* features) { - if (!m_initialized) - { - REQUIRE(features->get_feature_class()==C_DENSE, "PCA only works with dense features") - REQUIRE(features->get_feature_type()==F_DREAL, "PCA only works with real features") - - SGMatrix feature_matrix = ((CDenseFeatures*)features) - ->get_feature_matrix(); - int32_t num_vectors = feature_matrix.num_cols; - int32_t num_features = feature_matrix.num_rows; - SG_INFO( - "num_examples: %d num_features: %d\n", num_vectors, num_features) + if (m_initialized) + cleanup(); - // max target dim allowed - int32_t max_dim_allowed = CMath::min(num_vectors, num_features); - num_dim=0; + auto feature_matrix = + features->as>()->get_feature_matrix(); + int32_t num_vectors = feature_matrix.num_cols; + int32_t num_features = feature_matrix.num_rows; + SG_INFO("num_examples: %d num_features: %d\n", num_vectors, num_features) - REQUIRE(m_target_dim<=max_dim_allowed, - "target dimension should be less or equal to than minimum of N and D") + // max target dim allowed + int32_t max_dim_allowed = CMath::min(num_vectors, num_features); + num_dim = 0; - // center data - Map fmatrix(feature_matrix.matrix, num_features, num_vectors); + REQUIRE( + m_target_dim <= max_dim_allowed, + "target dimension should be less or equal to than minimum of N and D") - m_mean_vector = SGVector(num_features); - Map data_mean(m_mean_vector.vector, num_features); - data_mean = fmatrix.rowwise().sum()/(float64_t) num_vectors; - fmatrix = fmatrix.colwise()-data_mean; + // center data + Map fmatrix(feature_matrix.matrix, num_features, num_vectors); - m_eigenvalues_vector = SGVector(max_dim_allowed); + m_mean_vector = SGVector(num_features); + Map data_mean(m_mean_vector.vector, num_features); + data_mean = fmatrix.rowwise().sum() / (float64_t)num_vectors; + fmatrix = fmatrix.colwise() - data_mean; - if (m_method == AUTO) - m_method = (num_vectors>num_features) ? EVD : SVD; + m_eigenvalues_vector = SGVector(max_dim_allowed); - if (m_method == EVD) - init_with_evd(feature_matrix, max_dim_allowed); - else - init_with_svd(feature_matrix, max_dim_allowed); + if (m_method == AUTO) + m_method = (num_vectors > num_features) ? EVD : SVD; - // restore feature matrix - fmatrix = fmatrix.colwise()+data_mean; - m_initialized = true; - return true; - } + if (m_method == EVD) + init_with_evd(feature_matrix, max_dim_allowed); + else + init_with_svd(feature_matrix, max_dim_allowed); - return false; + // restore feature matrix + fmatrix = fmatrix.colwise() + data_mean; + m_initialized = true; } void CPCA::init_with_evd(const SGMatrix& feature_matrix, int32_t max_dim_allowed) diff --git a/src/shogun/preprocessor/PCA.h b/src/shogun/preprocessor/PCA.h index c652cea742c..38e195d5d71 100644 --- a/src/shogun/preprocessor/PCA.h +++ b/src/shogun/preprocessor/PCA.h @@ -130,10 +130,7 @@ class CPCA: public CDimensionReductionPreprocessor /** destructor */ virtual ~CPCA(); - /** initialize preprocessor from features - * @param features - */ - virtual bool init(CFeatures* features); + virtual void fit(CFeatures* features); /** cleanup */ virtual void cleanup(); diff --git a/tests/unit/preprocessor/KernelPCA_unittest.cc b/tests/unit/preprocessor/KernelPCA_unittest.cc index 004c0f8e6fd..aa85f89dbeb 100644 --- a/tests/unit/preprocessor/KernelPCA_unittest.cc +++ b/tests/unit/preprocessor/KernelPCA_unittest.cc @@ -51,7 +51,7 @@ TEST(KernelPCA, apply_to_feature_matrix) CKernelPCA* kpca = new CKernelPCA(kernel); SG_REF(kpca) kpca->set_target_dim(target_dim); - kpca->init(train_feats); + kpca->fit(train_feats); SGMatrix embedding = kpca->apply_to_feature_matrix(test_feats); @@ -82,7 +82,7 @@ TEST(KernelPCA, apply_to_feature_vector) CKernelPCA* kpca = new CKernelPCA(kernel); SG_REF(kpca) kpca->set_target_dim(target_dim); - kpca->init(train_feats); + kpca->fit(train_feats); SGVector embedding = kpca->apply_to_feature_vector(test_vector); diff --git a/tests/unit/preprocessor/PCA_unittest.cc b/tests/unit/preprocessor/PCA_unittest.cc index 7018264898a..77c480b3232 100644 --- a/tests/unit/preprocessor/PCA_unittest.cc +++ b/tests/unit/preprocessor/PCA_unittest.cc @@ -54,7 +54,7 @@ TEST(PCA, PCA_N_greater_D_EVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(EVD); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -118,7 +118,7 @@ TEST(PCA, PCA_N_equals_D_EVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(EVD); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -182,7 +182,7 @@ TEST(PCA, PCA_N_less_D_EVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(EVD); pca->set_target_dim(2); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -243,7 +243,7 @@ TEST(PCA, PCA_N_greater_D_SVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(SVD); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -303,7 +303,7 @@ TEST(PCA, PCA_N_equals_D_SVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(SVD); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -362,7 +362,7 @@ TEST(PCA, PCA_N_less_D_SVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(SVD); pca->set_target_dim(2); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -414,7 +414,7 @@ TEST(PCA, PCA_MEM_IN_PLACE) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(AUTO,false,MEM_IN_PLACE); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -480,7 +480,7 @@ TEST(PCA, PCA_apply_to_feature_vector_methodTest) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(SVD); pca->set_target_dim(2); - pca->init(features); + pca->fit(features); SGVector finalVector=pca->apply_to_feature_vector(inputVector); @@ -515,7 +515,7 @@ TEST(PCA, PCA_WHITEN_SVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(SVD,true); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix transmat=pca->get_transformation_matrix(); SGMatrix finalmat=pca->apply_to_feature_matrix(features); @@ -594,7 +594,7 @@ TEST(PCA, PCA_WHITEN_EVD) CDenseFeatures* features=new CDenseFeatures(data); CPCA* pca=new CPCA(EVD,true); pca->set_target_dim(3); - pca->init(features); + pca->fit(features); SGMatrix finalmat=pca->apply_to_feature_matrix(features);