From 76dfacc967f2ae7dd9473d265f2b0a52906518ab Mon Sep 17 00:00:00 2001 From: Wuwei Lin Date: Thu, 17 May 2018 21:03:19 +0800 Subject: [PATCH] Cleanup and implement apply_to_matrix in dense preproc subclasses --- .../preprocessor/HomogeneousKernelMap.cpp | 18 +++---- .../preprocessor/HomogeneousKernelMap.h | 10 ++-- src/shogun/preprocessor/LogPlusOne.cpp | 22 +++------ src/shogun/preprocessor/LogPlusOne.h | 9 ++-- src/shogun/preprocessor/NormOne.cpp | 25 ++++------ src/shogun/preprocessor/NormOne.h | 9 ++-- src/shogun/preprocessor/PNorm.cpp | 25 ++++------ src/shogun/preprocessor/PNorm.h | 9 ++-- src/shogun/preprocessor/PruneVarSubMean.cpp | 40 ++++++--------- src/shogun/preprocessor/PruneVarSubMean.h | 9 ++-- .../RandomFourierGaussPreproc.cpp | 49 +++++++++---------- .../preprocessor/RandomFourierGaussPreproc.h | 16 +++--- src/shogun/preprocessor/RescaleFeatures.cpp | 21 ++++---- src/shogun/preprocessor/RescaleFeatures.h | 13 ++--- src/shogun/preprocessor/SumOne.cpp | 25 ++++------ src/shogun/preprocessor/SumOne.h | 9 ++-- 16 files changed, 132 insertions(+), 177 deletions(-) diff --git a/src/shogun/preprocessor/HomogeneousKernelMap.cpp b/src/shogun/preprocessor/HomogeneousKernelMap.cpp index 93589c04003..c77d0038572 100644 --- a/src/shogun/preprocessor/HomogeneousKernelMap.cpp +++ b/src/shogun/preprocessor/HomogeneousKernelMap.cpp @@ -5,8 +5,8 @@ * Evgeniy Andreev, Evan Shelhamer, Bjoern Esser */ +#include #include - #include #include @@ -154,22 +154,20 @@ void CHomogeneousKernelMap::init() } -SGMatrix CHomogeneousKernelMap::apply_to_feature_matrix (CFeatures* features) +SGMatrix +CHomogeneousKernelMap::apply_to_matrix(SGMatrix matrix) { - auto simple_features = features->as>(); - int32_t num_vectors = simple_features->get_num_vectors (); - int32_t num_features = simple_features->get_num_features (); + auto num_vectors = matrix.num_cols; + auto num_features = matrix.num_rows; SGMatrix feature_matrix(num_features*(2*m_order+1),num_vectors); - for (int i = 0; i < num_vectors; ++i) + for (auto i : range(num_vectors)) { - SGVector transformed = apply_to_vector(simple_features->get_feature_vector(i)); - for (int j=0; j transformed = apply_to_vector(matrix.get_column(i)); + for (auto j : range(transformed.vlen)) feature_matrix(j,i) = transformed[j]; } - simple_features->set_feature_matrix(feature_matrix); - return feature_matrix; } diff --git a/src/shogun/preprocessor/HomogeneousKernelMap.h b/src/shogun/preprocessor/HomogeneousKernelMap.h index 73b3270be69..5eb124b93f8 100644 --- a/src/shogun/preprocessor/HomogeneousKernelMap.h +++ b/src/shogun/preprocessor/HomogeneousKernelMap.h @@ -72,12 +72,6 @@ namespace shogun /** cleanup */ virtual void cleanup(); - /** applies to features - * @param features features - * @return feature matrix - */ - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /** applies to feature vector * @param vector features vector * @return transformed feature vector @@ -135,6 +129,10 @@ namespace shogun */ float64_t get_period() const; + protected: + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; + private: void init (); void register_params (); diff --git a/src/shogun/preprocessor/LogPlusOne.cpp b/src/shogun/preprocessor/LogPlusOne.cpp index 36e8825dc00..c9dcf97a2cf 100644 --- a/src/shogun/preprocessor/LogPlusOne.cpp +++ b/src/shogun/preprocessor/LogPlusOne.cpp @@ -4,10 +4,11 @@ * Authors: Soeren Sonnenburg, Sergey Lisitsyn, Evgeniy Andreev */ -#include -#include +#include #include #include +#include +#include using namespace shogun; @@ -42,21 +43,14 @@ bool CLogPlusOne::save(FILE* f) return false; } -/// apply preproc on feature matrix -/// result in feature matrix -/// return pointer to feature_matrix, i.e. f->get_feature_matrix(); -SGMatrix CLogPlusOne::apply_to_feature_matrix(CFeatures* features) +SGMatrix CLogPlusOne::apply_to_matrix(SGMatrix matrix) { - auto feature_matrix = - features->as>()->get_feature_matrix(); - - for (int32_t i=0; i /// save preprocessor init-data to file virtual bool save(FILE* f); - /// apply preproc on feature matrix - /// result in feature matrix - /// return pointer to feature_matrix, i.e. f->get_feature_matrix(); - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /// apply preproc on single feature vector /// result in feature matrix virtual SGVector apply_to_feature_vector(SGVector vector); @@ -58,6 +53,10 @@ class CLogPlusOne : public CDensePreprocessor /// return a type of preprocessor virtual EPreprocessorType get_type() const { return P_LOGPLUSONE; } + + protected: + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; }; } #endif diff --git a/src/shogun/preprocessor/NormOne.cpp b/src/shogun/preprocessor/NormOne.cpp index e8c269701a2..177f3d3326c 100644 --- a/src/shogun/preprocessor/NormOne.cpp +++ b/src/shogun/preprocessor/NormOne.cpp @@ -5,11 +5,12 @@ * Viktor Gal */ -#include -#include +#include +#include #include #include -#include +#include +#include using namespace shogun; @@ -43,21 +44,15 @@ bool CNormOne::save(FILE* f) return false; } -/// apply preproc on feature matrix -/// result in feature matrix -/// return pointer to feature_matrix, i.e. f->get_feature_matrix(); -SGMatrix CNormOne::apply_to_feature_matrix(CFeatures* features) +SGMatrix CNormOne::apply_to_matrix(SGMatrix matrix) { - auto feature_matrix = - features->as>()->get_feature_matrix(); - - for (int32_t i=0; i vec(&(feature_matrix.matrix[i*feature_matrix.num_rows]), feature_matrix.num_rows, false); - float64_t norm = std::sqrt(linalg::dot(vec, vec)); - SGVector::scale_vector(1.0/norm, vec, feature_matrix.num_rows); + auto vec = matrix.get_column(i); + auto norm = linalg::norm(vec); + linalg::scale(vec, vec, 1.0 / norm); } - return feature_matrix; + return matrix; } /// apply preproc on single feature vector diff --git a/src/shogun/preprocessor/NormOne.h b/src/shogun/preprocessor/NormOne.h index ce9ad9c0500..4c23054bcf4 100644 --- a/src/shogun/preprocessor/NormOne.h +++ b/src/shogun/preprocessor/NormOne.h @@ -44,11 +44,6 @@ class CNormOne : public CDensePreprocessor /// save preprocessor init-data to file virtual bool save(FILE* f); - /// apply preproc on feature matrix - /// result in feature matrix - /// return pointer to feature_matrix, i.e. f->get_feature_matrix(); - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /// apply preproc on single feature vector /// result in feature matrix virtual SGVector apply_to_feature_vector(SGVector vector); @@ -58,6 +53,10 @@ class CNormOne : public CDensePreprocessor /// return a type of preprocessor virtual EPreprocessorType get_type() const { return P_NORMONE; } + + protected: + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; }; } #endif diff --git a/src/shogun/preprocessor/PNorm.cpp b/src/shogun/preprocessor/PNorm.cpp index 88af434e4cb..02deec38754 100644 --- a/src/shogun/preprocessor/PNorm.cpp +++ b/src/shogun/preprocessor/PNorm.cpp @@ -5,10 +5,11 @@ * Sergey Lisitsyn */ -#include -#include -#include #include +#include +#include +#include +#include #ifdef HAVE_LAPACK #include @@ -56,21 +57,15 @@ bool CPNorm::save (FILE* f) return false; } -/// apply preproc on feature matrix -/// result in feature matrix -/// return pointer to feature_matrix, i.e. f->get_feature_matrix(); -SGMatrix CPNorm::apply_to_feature_matrix (CFeatures* features) +SGMatrix CPNorm::apply_to_matrix(SGMatrix matrix) { - auto feature_matrix = - features->as>()->get_feature_matrix(); - - for (int32_t i=0; i::scale_vector(1.0/norm, vec, feature_matrix.num_rows); + auto vec = matrix.get_column(i); + auto norm = get_pnorm(vec.vector, vec.vlen); + linalg::scale(vec, vec, 1.0 / norm); } - return feature_matrix; + return matrix; } /// apply preproc on single feature vector diff --git a/src/shogun/preprocessor/PNorm.h b/src/shogun/preprocessor/PNorm.h index d94a8957d26..f75680bb98b 100644 --- a/src/shogun/preprocessor/PNorm.h +++ b/src/shogun/preprocessor/PNorm.h @@ -47,11 +47,6 @@ class CPNorm : public CDensePreprocessor /// save preprocessor init-data to file virtual bool save (FILE* f); - /// apply preproc on feature matrix - /// result in feature matrix - /// return pointer to feature_matrix, i.e. f->get_feature_matrix(); - virtual SGMatrix apply_to_feature_matrix (CFeatures* features); - /// apply preproc on single feature vector /// result in feature matrix virtual SGVector apply_to_feature_vector (SGVector vector); @@ -74,6 +69,10 @@ class CPNorm : public CDensePreprocessor */ double get_pnorm () const; + protected: + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; + private: void register_param (); inline float64_t get_pnorm (float64_t* vec, int32_t vec_len) const; diff --git a/src/shogun/preprocessor/PruneVarSubMean.cpp b/src/shogun/preprocessor/PruneVarSubMean.cpp index 14518880c3f..3b7aceebf27 100644 --- a/src/shogun/preprocessor/PruneVarSubMean.cpp +++ b/src/shogun/preprocessor/PruneVarSubMean.cpp @@ -51,12 +51,15 @@ void CPruneVarSubMean::fit(CFeatures* features) for (auto i : range(num_examples)) { for (auto j : range(num_features)) - var[j] += CMath::sq( - m_mean[j] - feature_matrix.matrix[i * num_features + j]); + { + auto diff = + linalg::add(m_mean, feature_matrix.get_column(i), 1.0, -1.0); + var[j] += linalg::dot(diff, diff); + } } int32_t num_ok = 0; - int32_t* idx_ok = SG_MALLOC(int32_t, num_features); + auto idx_ok = SGVector(num_features); for (auto j : range(num_features)) { @@ -82,7 +85,6 @@ void CPruneVarSubMean::fit(CFeatures* features) m_std[j] = std::sqrt(var[idx_ok[j]]); } m_num_idx = num_ok; - SG_FREE(idx_ok); m_mean = new_mean; m_initialized = true; @@ -97,43 +99,33 @@ void CPruneVarSubMean::cleanup() m_initialized = false; } -/// apply preproc on feature matrix -/// result in feature matrix -/// return pointer to feature_matrix, i.e. f->get_feature_matrix(); -SGMatrix CPruneVarSubMean::apply_to_feature_matrix(CFeatures* features) +SGMatrix +CPruneVarSubMean::apply_to_matrix(SGMatrix matrix) { ASSERT(m_initialized) - int32_t num_vectors=0; - int32_t num_features=0; + int32_t num_vectors = matrix.num_cols; - auto simple_features = features->as>(); - auto m = simple_features->get_feature_matrix(); + SGMatrix result(matrix.data(), m_num_idx, num_vectors); - SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features) - SG_INFO("Preprocessing feature matrix\n") - for (int32_t vec=0; vecset_num_features(m_num_idx); - simple_features->get_feature_matrix(num_features, num_vectors); - SG_INFO("new Feature matrix: %ix%i\n", num_vectors, num_features) - - return simple_features->get_feature_matrix(); + return result; } /// apply preproc on single feature vector diff --git a/src/shogun/preprocessor/PruneVarSubMean.h b/src/shogun/preprocessor/PruneVarSubMean.h index d7938902a84..ffeed74bb3a 100644 --- a/src/shogun/preprocessor/PruneVarSubMean.h +++ b/src/shogun/preprocessor/PruneVarSubMean.h @@ -41,11 +41,6 @@ class CPruneVarSubMean : public CDensePreprocessor /// cleanup virtual void cleanup(); - /// apply preproc on feature matrix - /// result in feature matrix - /// return pointer to feature_matrix, i.e. f->get_feature_matrix(); - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /// apply preproc on single feature vector /// result in feature matrix virtual SGVector apply_to_feature_vector(SGVector vector); @@ -56,6 +51,10 @@ class CPruneVarSubMean : public CDensePreprocessor /// return a type of preprocessor virtual EPreprocessorType get_type() const { return P_PRUNEVARSUBMEAN; } + protected: + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; + private: void init(); void register_parameters(); diff --git a/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp b/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp index 02c7c548d81..82826d01205 100644 --- a/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp +++ b/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp @@ -369,45 +369,44 @@ SGVector CRandomFourierGaussPreproc::apply_to_feature_vector(SGVector return res; } -SGMatrix CRandomFourierGaussPreproc::apply_to_feature_matrix(CFeatures* features) +SGMatrix +CRandomFourierGaussPreproc::apply_to_matrix(SGMatrix matrix) { // version for case dim_feature_space < dim_input space with direct transformation on feature matrix ?? - int32_t num_vectors = 0; - int32_t num_features = 0; - float64_t* m = ((CDenseFeatures*) features)->get_feature_matrix( - num_features, num_vectors); + int32_t num_vectors = matrix.num_cols; + int32_t num_features = matrix.num_rows; + SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features) - if (num_features!=cur_dim_input_space) + if (num_features != cur_dim_input_space) { throw ShogunException( - "float64_t * CRandomFourierGaussPreproc::apply_to_feature_matrix(CFeatures *f): num_features!=cur_dim_input_space is not allowed\n"); + "float64_t * " + "CRandomFourierGaussPreproc::apply_to_matrix(" + "SGMatrix matrix): matrix.num_rows != " + "cur_dim_input_space is not allowed\n"); } - if (m) - { - SGMatrix res(cur_dim_feature_space,num_vectors); + SGMatrix res(cur_dim_feature_space, num_vectors); - float64_t val = std::sqrt(2.0 / cur_dim_feature_space); + float64_t val = std::sqrt(2.0 / cur_dim_feature_space); - for (int32_t vec = 0; vec < num_vectors; vec++) + for (auto vec : range(num_vectors)) + { + for (auto od : range(cur_dim_feature_space)) { - for (int32_t od = 0; od < cur_dim_feature_space; ++od) - { - SGVector a(m+vec * num_features, cur_dim_input_space, false); - SGVector b(randomcoeff_multiplicative+od*cur_dim_input_space, cur_dim_input_space, false); - res.matrix[od + vec * cur_dim_feature_space] = val * cos( - randomcoeff_additive[od] - + linalg::dot(a, b)); - } + SGVector a( + matrix.matrix + vec * num_features, cur_dim_input_space, false); + SGVector b( + randomcoeff_multiplicative + od * cur_dim_input_space, + cur_dim_input_space, false); + res(vec, od) = + val * cos(randomcoeff_additive[od] + linalg::dot(a, b)); } - ((CDenseFeatures*) features)->set_feature_matrix(res); - - return res; } - else - return SGMatrix(); + + return res; } void CRandomFourierGaussPreproc::cleanup() diff --git a/src/shogun/preprocessor/RandomFourierGaussPreproc.h b/src/shogun/preprocessor/RandomFourierGaussPreproc.h index 1feb0b3c62b..4d7e9ccc8b5 100644 --- a/src/shogun/preprocessor/RandomFourierGaussPreproc.h +++ b/src/shogun/preprocessor/RandomFourierGaussPreproc.h @@ -54,13 +54,6 @@ class CRandomFourierGaussPreproc: public CDensePreprocessor { */ ~CRandomFourierGaussPreproc(); - /** default processing routine, inherited from base class - * @param features the features to be processed, must be of type CDenseFeatures - * @return the processed feature matrix from the CDenseFeatures class - * in case (2) (see description above) this routine requires only steps 2a) and 2b), the rest is determined automatically - */ - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); // ref count fo the feature matrix??? - /** alternative processing routine, inherited from base class * @param vector the feature vector to be processed @@ -155,7 +148,6 @@ class CRandomFourierGaussPreproc: public CDensePreprocessor { */ bool init_randomcoefficients(); - /** a getter * @return the set value of protected member dim_input_space */ @@ -178,6 +170,14 @@ class CRandomFourierGaussPreproc: public CDensePreprocessor { virtual EPreprocessorType get_type() const { return P_RANDOMFOURIERGAUSS; } protected: + /** default processing routine, inherited from base class + * @param matrix the features matrix to be processed + * @return the processed feature matrix from the CDenseFeatures + * class in case (2) (see description above) this routine requires only + * steps 2a) and 2b), the rest is determined automatically + */ + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; /** * helper for copy constructor and assignment operator= diff --git a/src/shogun/preprocessor/RescaleFeatures.cpp b/src/shogun/preprocessor/RescaleFeatures.cpp index 8dcf1d5f5ee..0c875ca634f 100644 --- a/src/shogun/preprocessor/RescaleFeatures.cpp +++ b/src/shogun/preprocessor/RescaleFeatures.cpp @@ -4,6 +4,8 @@ * Authors: Viktor Gal, Bjoern Esser */ +#include +#include #include using namespace shogun; @@ -70,24 +72,21 @@ void CRescaleFeatures::cleanup() m_initialized = false; } -SGMatrix CRescaleFeatures::apply_to_feature_matrix(CFeatures* features) +SGMatrix +CRescaleFeatures::apply_to_matrix(SGMatrix matrix) { ASSERT(m_initialized); - auto feature_matrix = - features->as>()->get_feature_matrix(); - ASSERT(feature_matrix.num_rows == m_min.vlen); + ASSERT(matrix.num_rows == m_min.vlen); - for (index_t i = 0; i < feature_matrix.num_cols; i++) + for (auto i : range(matrix.num_cols)) { - float64_t* vec = feature_matrix.get_column_vector(i); - SGVector::vec1_plus_scalar_times_vec2(vec, -1.0, m_min.vector, feature_matrix.num_rows); - for (index_t j = 0; j < feature_matrix.num_rows; j++) { - vec[j] *= m_range[j]; - } + auto vec = matrix.get_column(i); + linalg::add(vec, m_min, vec, 1.0, -1.0); + linalg::element_prod(vec, m_range, vec); } - return feature_matrix; + return matrix; } SGVector CRescaleFeatures::apply_to_feature_vector(SGVector vector) diff --git a/src/shogun/preprocessor/RescaleFeatures.h b/src/shogun/preprocessor/RescaleFeatures.h index eca25eeb40e..5b54c36dec3 100644 --- a/src/shogun/preprocessor/RescaleFeatures.h +++ b/src/shogun/preprocessor/RescaleFeatures.h @@ -45,14 +45,6 @@ namespace shogun */ virtual void cleanup(); - /** - * Apply preproc on a feature matrix - * - * @param features input feature matrix - * @return pointer to feature_matrix, i.e. f->get_feature_matrix(); - */ - virtual SGMatrix - apply_to_feature_matrix(CFeatures* features); /** * Apply preproc on a single feature vector @@ -76,7 +68,10 @@ namespace shogun void register_parameters(); protected: - /** min */ + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; + + /** min */ SGVector m_min; /** 1.0/(max[i]-min[i]) */ SGVector m_range; diff --git a/src/shogun/preprocessor/SumOne.cpp b/src/shogun/preprocessor/SumOne.cpp index 9f083ec030d..b249700ca15 100644 --- a/src/shogun/preprocessor/SumOne.cpp +++ b/src/shogun/preprocessor/SumOne.cpp @@ -4,10 +4,11 @@ * Authors: Soeren Sonnenburg, Evgeniy Andreev, Sergey Lisitsyn */ -#include -#include -#include #include +#include +#include +#include +#include using namespace shogun; @@ -41,21 +42,15 @@ bool CSumOne::save(FILE* f) return false; } -/// apply preproc on feature matrix -/// result in feature matrix -/// return pointer to feature_matrix, i.e. f->get_feature_matrix(); -SGMatrix CSumOne::apply_to_feature_matrix(CFeatures* features) +SGMatrix CSumOne::apply_to_matrix(SGMatrix matrix) { - auto feature_matrix = - features->as>()->get_feature_matrix(); - - for (int32_t i=0; i::sum(vec,feature_matrix.num_rows); - SGVector::scale_vector(1.0/sum, vec, feature_matrix.num_rows); + auto vec = matrix.get_column(i); + auto sum = linalg::sum(vec); + linalg::scale(vec, vec, 1.0 / sum); } - return feature_matrix; + return matrix; } /// apply preproc on single feature vector diff --git a/src/shogun/preprocessor/SumOne.h b/src/shogun/preprocessor/SumOne.h index f1c0bd55226..30a99e47aee 100644 --- a/src/shogun/preprocessor/SumOne.h +++ b/src/shogun/preprocessor/SumOne.h @@ -42,11 +42,6 @@ class CSumOne : public CDensePreprocessor /// save preprocessor init-data to file virtual bool save(FILE* f); - /// apply preproc on feature matrix - /// result in feature matrix - /// return pointer to feature_matrix, i.e. f->get_feature_matrix(); - virtual SGMatrix apply_to_feature_matrix(CFeatures* features); - /// apply preproc on single feature vector /// result in feature matrix virtual SGVector apply_to_feature_vector(SGVector vector); @@ -56,6 +51,10 @@ class CSumOne : public CDensePreprocessor /// return a type of preprocessor virtual EPreprocessorType get_type() const { return P_SUMONE; } + + protected: + virtual SGMatrix + apply_to_matrix(SGMatrix matrix) override; }; } #endif