Skip to content

Commit

Permalink
Cleanup and implement apply_to_matrix in dense preproc subclasses
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored and vigsterkr committed Jul 12, 2018
1 parent 8853809 commit c06258f
Show file tree
Hide file tree
Showing 16 changed files with 132 additions and 177 deletions.
18 changes: 8 additions & 10 deletions src/shogun/preprocessor/HomogeneousKernelMap.cpp
Expand Up @@ -5,8 +5,8 @@
* Evgeniy Andreev, Evan Shelhamer, Bjoern Esser
*/

#include <shogun/base/range.h>
#include <shogun/io/SGIO.h>

#include <shogun/mathematics/Math.h>
#include <shogun/preprocessor/HomogeneousKernelMap.h>

Expand Down Expand Up @@ -154,22 +154,20 @@ void CHomogeneousKernelMap::init()

}

SGMatrix<float64_t> CHomogeneousKernelMap::apply_to_feature_matrix (CFeatures* features)
SGMatrix<float64_t>
CHomogeneousKernelMap::apply_to_matrix(SGMatrix<float64_t> matrix)
{
auto simple_features = features->as<CDenseFeatures<float64_t>>();
int32_t num_vectors = simple_features->get_num_vectors ();
int32_t num_features = simple_features->get_num_features ();
auto num_vectors = matrix.num_cols;
auto num_features = matrix.num_rows;

SGMatrix<float64_t> feature_matrix(num_features*(2*m_order+1),num_vectors);
for (int i = 0; i < num_vectors; ++i)
for (auto i : range(num_vectors))
{
SGVector<float64_t> transformed = apply_to_vector(simple_features->get_feature_vector(i));
for (int j=0; j<transformed.vlen; j++)
SGVector<float64_t> transformed = apply_to_vector(matrix.get_column(i));
for (auto j : range(transformed.vlen))
feature_matrix(j,i) = transformed[j];
}

simple_features->set_feature_matrix(feature_matrix);

return feature_matrix;
}

Expand Down
10 changes: 4 additions & 6 deletions src/shogun/preprocessor/HomogeneousKernelMap.h
Expand Up @@ -72,12 +72,6 @@ namespace shogun
/** cleanup */
virtual void cleanup();

/** applies to features
* @param features features
* @return feature matrix
*/
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/** applies to feature vector
* @param vector features vector
* @return transformed feature vector
Expand Down Expand Up @@ -135,6 +129,10 @@ namespace shogun
*/
float64_t get_period() const;

protected:
virtual SGMatrix<float64_t>
apply_to_matrix(SGMatrix<float64_t> matrix) override;

private:
void init ();
void register_params ();
Expand Down
22 changes: 8 additions & 14 deletions src/shogun/preprocessor/LogPlusOne.cpp
Expand Up @@ -4,10 +4,11 @@
* Authors: Soeren Sonnenburg, Sergey Lisitsyn, Evgeniy Andreev
*/

#include <shogun/preprocessor/LogPlusOne.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/base/range.h>
#include <shogun/features/Features.h>
#include <shogun/mathematics/Math.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/preprocessor/LogPlusOne.h>

using namespace shogun;

Expand Down Expand Up @@ -42,21 +43,14 @@ bool CLogPlusOne::save(FILE* f)
return false;
}

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CLogPlusOne::apply_to_feature_matrix(CFeatures* features)
SGMatrix<float64_t> CLogPlusOne::apply_to_matrix(SGMatrix<float64_t> matrix)
{
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();

for (int32_t i=0; i<feature_matrix.num_cols; i++)
for (auto i : range(matrix.num_cols))
{
for (int32_t j=0; j<feature_matrix.num_rows; j++)
feature_matrix.matrix[i * feature_matrix.num_rows + j] = std::log(
feature_matrix.matrix[i * feature_matrix.num_rows + j] + 1.0);
for (auto j : range(matrix.num_rows))
matrix(i, j) = std::log(matrix(i, j) + 1.0);
}
return feature_matrix;
return matrix;
}

/// apply preproc on single feature vector
Expand Down
9 changes: 4 additions & 5 deletions src/shogun/preprocessor/LogPlusOne.h
Expand Up @@ -44,11 +44,6 @@ class CLogPlusOne : public CDensePreprocessor<float64_t>
/// save preprocessor init-data to file
virtual bool save(FILE* f);

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/// apply preproc on single feature vector
/// result in feature matrix
virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);
Expand All @@ -58,6 +53,10 @@ class CLogPlusOne : public CDensePreprocessor<float64_t>

/// return a type of preprocessor
virtual EPreprocessorType get_type() const { return P_LOGPLUSONE; }

protected:
virtual SGMatrix<float64_t>
apply_to_matrix(SGMatrix<float64_t> matrix) override;
};
}
#endif
25 changes: 10 additions & 15 deletions src/shogun/preprocessor/NormOne.cpp
Expand Up @@ -5,11 +5,12 @@
* Viktor Gal
*/

#include <shogun/preprocessor/NormOne.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/base/range.h>
#include <shogun/features/Features.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>
#include <shogun/features/Features.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/preprocessor/NormOne.h>

using namespace shogun;

Expand Down Expand Up @@ -43,21 +44,15 @@ bool CNormOne::save(FILE* f)
return false;
}

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CNormOne::apply_to_feature_matrix(CFeatures* features)
SGMatrix<float64_t> CNormOne::apply_to_matrix(SGMatrix<float64_t> matrix)
{
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();

for (int32_t i=0; i<feature_matrix.num_cols; i++)
for (auto i : range(matrix.num_cols))
{
SGVector<float64_t> vec(&(feature_matrix.matrix[i*feature_matrix.num_rows]), feature_matrix.num_rows, false);
float64_t norm = std::sqrt(linalg::dot(vec, vec));
SGVector<float64_t>::scale_vector(1.0/norm, vec, feature_matrix.num_rows);
auto vec = matrix.get_column(i);
auto norm = linalg::norm(vec);
linalg::scale(vec, vec, 1.0 / norm);
}
return feature_matrix;
return matrix;
}

/// apply preproc on single feature vector
Expand Down
9 changes: 4 additions & 5 deletions src/shogun/preprocessor/NormOne.h
Expand Up @@ -44,11 +44,6 @@ class CNormOne : public CDensePreprocessor<float64_t>
/// save preprocessor init-data to file
virtual bool save(FILE* f);

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/// apply preproc on single feature vector
/// result in feature matrix
virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);
Expand All @@ -58,6 +53,10 @@ class CNormOne : public CDensePreprocessor<float64_t>

/// return a type of preprocessor
virtual EPreprocessorType get_type() const { return P_NORMONE; }

protected:
virtual SGMatrix<float64_t>
apply_to_matrix(SGMatrix<float64_t> matrix) override;
};
}
#endif
25 changes: 10 additions & 15 deletions src/shogun/preprocessor/PNorm.cpp
Expand Up @@ -5,10 +5,11 @@
* Sergey Lisitsyn
*/

#include <shogun/preprocessor/PNorm.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/mathematics/Math.h>
#include <shogun/features/Features.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/preprocessor/PNorm.h>

#ifdef HAVE_LAPACK
#include <shogun/mathematics/lapack.h>
Expand Down Expand Up @@ -56,21 +57,15 @@ bool CPNorm::save (FILE* f)
return false;
}

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CPNorm::apply_to_feature_matrix (CFeatures* features)
SGMatrix<float64_t> CPNorm::apply_to_matrix(SGMatrix<float64_t> matrix)
{
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();

for (int32_t i=0; i<feature_matrix.num_cols; i++)
for (auto i : range(matrix.num_cols))
{
float64_t* vec= &(feature_matrix.matrix[i*feature_matrix.num_rows]);
float64_t norm = get_pnorm (vec, feature_matrix.num_rows);
SGVector<float64_t>::scale_vector(1.0/norm, vec, feature_matrix.num_rows);
auto vec = matrix.get_column(i);
auto norm = get_pnorm(vec.vector, vec.vlen);
linalg::scale(vec, vec, 1.0 / norm);
}
return feature_matrix;
return matrix;
}

/// apply preproc on single feature vector
Expand Down
9 changes: 4 additions & 5 deletions src/shogun/preprocessor/PNorm.h
Expand Up @@ -47,11 +47,6 @@ class CPNorm : public CDensePreprocessor<float64_t>
/// save preprocessor init-data to file
virtual bool save (FILE* f);

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
virtual SGMatrix<float64_t> apply_to_feature_matrix (CFeatures* features);

/// apply preproc on single feature vector
/// result in feature matrix
virtual SGVector<float64_t> apply_to_feature_vector (SGVector<float64_t> vector);
Expand All @@ -74,6 +69,10 @@ class CPNorm : public CDensePreprocessor<float64_t>
*/
double get_pnorm () const;

protected:
virtual SGMatrix<float64_t>
apply_to_matrix(SGMatrix<float64_t> matrix) override;

private:
void register_param ();
inline float64_t get_pnorm (float64_t* vec, int32_t vec_len) const;
Expand Down
40 changes: 16 additions & 24 deletions src/shogun/preprocessor/PruneVarSubMean.cpp
Expand Up @@ -51,12 +51,15 @@ void CPruneVarSubMean::fit(CFeatures* features)
for (auto i : range(num_examples))
{
for (auto j : range(num_features))
var[j] += CMath::sq(
m_mean[j] - feature_matrix.matrix[i * num_features + j]);
{
auto diff =
linalg::add(m_mean, feature_matrix.get_column(i), 1.0, -1.0);
var[j] += linalg::dot(diff, diff);
}
}

int32_t num_ok = 0;
int32_t* idx_ok = SG_MALLOC(int32_t, num_features);
auto idx_ok = SGVector<int32_t>(num_features);

for (auto j : range(num_features))
{
Expand All @@ -82,7 +85,6 @@ void CPruneVarSubMean::fit(CFeatures* features)
m_std[j] = std::sqrt(var[idx_ok[j]]);
}
m_num_idx = num_ok;
SG_FREE(idx_ok);
m_mean = new_mean;

m_initialized = true;
Expand All @@ -97,43 +99,33 @@ void CPruneVarSubMean::cleanup()
m_initialized = false;
}

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CPruneVarSubMean::apply_to_feature_matrix(CFeatures* features)
SGMatrix<float64_t>
CPruneVarSubMean::apply_to_matrix(SGMatrix<float64_t> matrix)
{
ASSERT(m_initialized)

int32_t num_vectors=0;
int32_t num_features=0;
int32_t num_vectors = matrix.num_cols;

auto simple_features = features->as<CDenseFeatures<float64_t>>();
auto m = simple_features->get_feature_matrix();
SGMatrix<float64_t> result(matrix.data(), m_num_idx, num_vectors);

SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
SG_INFO("Preprocessing feature matrix\n")
for (int32_t vec=0; vec<num_vectors; vec++)
for (auto i : range(num_vectors))
{
float64_t* v_src=&m[num_features*vec];
float64_t* v_dst=&m[m_num_idx*vec];
auto v_src = matrix.get_column(i);
auto v_dst = matrix.get_column(i);

if (m_divide_by_std)
{
for (int32_t feat=0; feat<m_num_idx; feat++)
for (auto feat : range(m_num_idx))
v_dst[feat]=(v_src[m_idx[feat]]-m_mean[feat])/m_std[feat];
}
else
{
for (int32_t feat=0; feat<m_num_idx; feat++)
for (auto feat : range(m_num_idx))
v_dst[feat]=(v_src[m_idx[feat]]-m_mean[feat]);
}
}

simple_features->set_num_features(m_num_idx);
simple_features->get_feature_matrix(num_features, num_vectors);
SG_INFO("new Feature matrix: %ix%i\n", num_vectors, num_features)

return simple_features->get_feature_matrix();
return result;
}

/// apply preproc on single feature vector
Expand Down
9 changes: 4 additions & 5 deletions src/shogun/preprocessor/PruneVarSubMean.h
Expand Up @@ -41,11 +41,6 @@ class CPruneVarSubMean : public CDensePreprocessor<float64_t>
/// cleanup
virtual void cleanup();

/// apply preproc on feature matrix
/// result in feature matrix
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/// apply preproc on single feature vector
/// result in feature matrix
virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);
Expand All @@ -56,6 +51,10 @@ class CPruneVarSubMean : public CDensePreprocessor<float64_t>
/// return a type of preprocessor
virtual EPreprocessorType get_type() const { return P_PRUNEVARSUBMEAN; }

protected:
virtual SGMatrix<float64_t>
apply_to_matrix(SGMatrix<float64_t> matrix) override;

private:
void init();
void register_parameters();
Expand Down

0 comments on commit c06258f

Please sign in to comment.