Skip to content

Commit

Permalink
Implement RescaleFeatures preprocessor
Browse files Browse the repository at this point in the history
See the doxygen class description.
Add new get_row_vector(index) method for SGMatrix
  • Loading branch information
vigsterkr committed May 10, 2013
1 parent f68d3cb commit 173b59d
Show file tree
Hide file tree
Showing 8 changed files with 279 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/interfaces/modular/Preprocessor.i
Expand Up @@ -18,6 +18,7 @@
%rename(RandomFourierGaussPreproc) CRandomFourierGaussPreproc;
%rename(HomogeneousKernelMap) CHomogeneousKernelMap;
%rename(PNorm) CPNorm;
%rename(RescaleFeatures) CRescaleFeatures;

%rename(DimensionReductionPreprocessor) CDimensionReductionPreprocessor;
%rename(PCA) CPCA;
Expand Down Expand Up @@ -102,6 +103,7 @@ namespace shogun
%include <shogun/preprocessor/RandomFourierGaussPreproc.h>
%include <shogun/preprocessor/HomogeneousKernelMap.h>
%include <shogun/preprocessor/PNorm.h>
%include <shogun/preprocessor/RescaleFeatures.h>

%include <shogun/preprocessor/PCA.h>
%include <shogun/preprocessor/KernelPCA.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Preprocessor_includes.i
Expand Up @@ -12,6 +12,7 @@
#include <shogun/preprocessor/RandomFourierGaussPreproc.h>
#include <shogun/preprocessor/HomogeneousKernelMap.h>
#include <shogun/preprocessor/PNorm.h>
#include <shogun/preprocessor/RescaleFeatures.h>

#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
#include <shogun/preprocessor/PCA.h>
Expand Down
11 changes: 11 additions & 0 deletions src/shogun/lib/SGMatrix.cpp
Expand Up @@ -893,6 +893,17 @@ void SGMatrix<T>::save(CFile* writer)
SG_RESET_LOCALE;
}

template<class T>
SGVector<T> SGMatrix<T>::get_row_vector(index_t row) const
{
SGVector<T> rowv(num_cols, false);
for (index_t i = 0; i < num_cols; i++)
{
rowv[i] = matrix[i*num_rows+row];
}
return rowv;
}

template class SGMatrix<bool>;
template class SGMatrix<char>;
template class SGMatrix<int8_t>;
Expand Down
7 changes: 7 additions & 0 deletions src/shogun/lib/SGMatrix.h
Expand Up @@ -53,6 +53,13 @@ template<class T> class SGMatrix : public SGReferencedData
return &matrix[col*num_rows];
}

/** get a row vector
*
* @param row row index
* @return row vector
*/
SGVector<T> get_row_vector(index_t row) const;

/** operator overload for matrix read only access
* @param i_row
* @param i_col
Expand Down
3 changes: 2 additions & 1 deletion src/shogun/preprocessor/Preprocessor.h
Expand Up @@ -48,7 +48,8 @@ enum EPreprocessorType
P_DIMENSIONREDUCTIONPREPROCESSOR=160,
P_SUMONE=170,
P_HOMOGENEOUSKERNELMAP = 180,
P_PNORM = 190
P_PNORM = 190,
P_RESCALEFEATURES = 200
};

/** @brief Class Preprocessor defines a preprocessor interface.
Expand Down
104 changes: 104 additions & 0 deletions src/shogun/preprocessor/RescaleFeatures.cpp
@@ -0,0 +1,104 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 20013 Viktor Gal
* Copyright (C) 2013 Viktor Gal
*/

#include <shogun/preprocessor/RescaleFeatures.h>

using namespace shogun;

CRescaleFeatures::CRescaleFeatures()
: CDensePreprocessor<float64_t>()
{

}

CRescaleFeatures::~CRescaleFeatures()
{

}

bool CRescaleFeatures::init(CFeatures* features)
{
ASSERT(features->get_feature_class()==C_DENSE);
ASSERT(features->get_feature_type()==F_DREAL);
return true;
}

void CRescaleFeatures::cleanup()
{

}

bool CRescaleFeatures::load(FILE* f)
{
SG_SET_LOCALE_C;
SG_RESET_LOCALE;
return false;
}

bool CRescaleFeatures::save(FILE* f)
{
SG_SET_LOCALE_C;
SG_RESET_LOCALE;
return false;
}

SGMatrix<float64_t> CRescaleFeatures::apply_to_feature_matrix(CFeatures* features)
{
SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
for (index_t i = 0; i < feature_matrix.num_rows; i++)
{
SGVector<float64_t> vec = feature_matrix.get_row_vector(i);
float64_t min = vec[0];
float64_t max = vec[0];

/* find the max and min values in one loop */
for (index_t j = 1; j < vec.vlen; j++)
{
min = CMath::min(vec[j], min);
max = CMath::max(vec[j], max);
}
float64_t range = max-min;

if (range > 0)
{
for (index_t j = 0; j < feature_matrix.num_cols; j++)
{
float64_t& k = feature_matrix(i, j);
k = (k-min)/range;
}
}
}

return feature_matrix;
}

SGVector<float64_t> CRescaleFeatures::apply_to_feature_vector(SGVector<float64_t> vector)
{
ASSERT(vector.vlen > 0);
SGVector<float64_t> rescaled_vec = vector.clone();
float64_t min = vector[0];
float64_t max = vector[0];

/* find the max and min values in one loop */
for (index_t i = 1; i < vector.vlen; i++)
{
min = CMath::min(vector[i], min);
max = CMath::max(vector[i], max);
}
float64_t range = max - min;

if (range > 0)
{
rescaled_vec.add(-min);
rescaled_vec.scale(1/range);
}

return rescaled_vec;
}
83 changes: 83 additions & 0 deletions src/shogun/preprocessor/RescaleFeatures.h
@@ -0,0 +1,83 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 20013 Viktor Gal
* Copyright (C) 2013 Viktor Gal
*/

#ifndef __RESCALEFEATURES_H__
#define __RESCALEFEATURES_H__

#include <shogun/preprocessor/DensePreprocessor.h>

namespace shogun
{
/**@brief Preprocessor RescaleFeautres is rescaling the range of features to
* make the features independent of each other and aims to scale the range
* in [0, 1] or [−1, 1].
*
* The general formula is given as:
* \f[
* x' = \frac{x - min}{max - min}
* \f]
* where \f$x\f$ is an original value, \f$x'\f$ is the normalized value.
* It does not need any initialization.
*/
class CRescaleFeatures : public CDensePreprocessor<float64_t>
{
public:
/** default ctor */
CRescaleFeatures();

/** dtor */
virtual ~CRescaleFeatures();

/**
* initialize preprocessor from features
* initialization is not required by this preprocessor.
*/
virtual bool init(CFeatures* features);

/**
* Cleanup
*/
virtual void cleanup();

/**
* initialize preprocessor from file
*/
virtual bool load(FILE* f);

/**
* save preprocessor init-data to file
*/
virtual bool save(FILE* f);

/**
* Apply preproc on a feature matrix
*
* @param features input feature matrix
* @return pointer to feature_matrix, i.e. f->get_feature_matrix();
*/
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/**
* Apply preproc on a single feature vector
*
* @param vector the input feature vector
* @return the output feature vector
*/
virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);

/** @return object name */
virtual const char* get_name() const { return "RescaleFeatures"; }

/** return a type of preprocessor */
virtual EPreprocessorType get_type() const { return P_RESCALEFEATURES; }
};
}

#endif /* __RESCALEFEATURES_H__ */
69 changes: 69 additions & 0 deletions tests/unit/preprocessor/RescaleFeatures_unittest.cc
@@ -0,0 +1,69 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Viktor Gal
*/

#include <shogun/preprocessor/RescaleFeatures.h>
#include <gtest/gtest.h>

using namespace shogun;

TEST(RescaleFeatures, apply_to_feature_vector)
{
index_t vlen = 10;
SGVector<float64_t> t(vlen);
CRescaleFeatures rescaler;

sg_rand->set_seed(12345);
t.random(-1024, 1024);
float64_t min = SGVector<float64_t>::min(t, vlen);
float64_t max = SGVector<float64_t>::max(t, vlen);
float64_t range = max - min;
SGVector<float64_t> out = rescaler.apply_to_feature_vector(t);

for (index_t i = 0; i < vlen; i++) {
float64_t e = (t[i]-min)/range;
EXPECT_DOUBLE_EQ(e, out[i]);
}
}

TEST(RescaleFeatures, apply_to_feature_matrix)
{
index_t num_features = 3;
index_t num_vectors = 10;
SGVector<float64_t> min(num_features), range(num_features);
SGVector<float64_t> v(num_features*num_vectors), ev;
v.random(-1024, 1024);
ev = v.clone();

SGMatrix<float64_t> m(v.vector, num_features, num_vectors, false);
SGMatrix<float64_t> em(ev.vector, num_features, num_vectors, false);
CDenseFeatures<float64_t>* feats = new CDenseFeatures<float64_t>(m);
CRescaleFeatures* rescaler = new CRescaleFeatures();

/* find the min and range for each feature among all the vectors */
for (index_t i = 0; i < num_features; i++)
{
SGVector<float64_t> t = em.get_row_vector(i);
min[i] = SGVector<float64_t>::min(t.vector, t.vlen);
range[i] = SGVector<float64_t>::max(t.vector, t.vlen) - min[i];
}

feats->add_preprocessor(rescaler);
feats->apply_preprocessor();
for (index_t i = 0; i < num_vectors; i++)
{
SGVector<float64_t> v = feats->get_feature_vector(i);
float64_t* v_orig = em.get_column_vector(i);
for (index_t j = 0; j < num_features; j++) {
float64_t e = (v_orig[j]-min[j])/range[j];
EXPECT_DOUBLE_EQ(e, v[j]);
}
}

SG_UNREF(feats);
}

0 comments on commit 173b59d

Please sign in to comment.