Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement RescaleFeatures preprocessor
See the doxygen class description. Add new get_row_vector(index) method for SGMatrix
- Loading branch information
Showing
8 changed files
with
279 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 20013 Viktor Gal | ||
* Copyright (C) 2013 Viktor Gal | ||
*/ | ||
|
||
#include <shogun/preprocessor/RescaleFeatures.h> | ||
|
||
using namespace shogun; | ||
|
||
CRescaleFeatures::CRescaleFeatures() | ||
: CDensePreprocessor<float64_t>() | ||
{ | ||
|
||
} | ||
|
||
CRescaleFeatures::~CRescaleFeatures() | ||
{ | ||
|
||
} | ||
|
||
bool CRescaleFeatures::init(CFeatures* features) | ||
{ | ||
ASSERT(features->get_feature_class()==C_DENSE); | ||
ASSERT(features->get_feature_type()==F_DREAL); | ||
return true; | ||
} | ||
|
||
void CRescaleFeatures::cleanup() | ||
{ | ||
|
||
} | ||
|
||
bool CRescaleFeatures::load(FILE* f) | ||
{ | ||
SG_SET_LOCALE_C; | ||
SG_RESET_LOCALE; | ||
return false; | ||
} | ||
|
||
bool CRescaleFeatures::save(FILE* f) | ||
{ | ||
SG_SET_LOCALE_C; | ||
SG_RESET_LOCALE; | ||
return false; | ||
} | ||
|
||
SGMatrix<float64_t> CRescaleFeatures::apply_to_feature_matrix(CFeatures* features) | ||
{ | ||
SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix(); | ||
for (index_t i = 0; i < feature_matrix.num_rows; i++) | ||
{ | ||
SGVector<float64_t> vec = feature_matrix.get_row_vector(i); | ||
float64_t min = vec[0]; | ||
float64_t max = vec[0]; | ||
|
||
/* find the max and min values in one loop */ | ||
for (index_t j = 1; j < vec.vlen; j++) | ||
{ | ||
min = CMath::min(vec[j], min); | ||
max = CMath::max(vec[j], max); | ||
} | ||
float64_t range = max-min; | ||
|
||
if (range > 0) | ||
{ | ||
for (index_t j = 0; j < feature_matrix.num_cols; j++) | ||
{ | ||
float64_t& k = feature_matrix(i, j); | ||
k = (k-min)/range; | ||
} | ||
} | ||
} | ||
|
||
return feature_matrix; | ||
} | ||
|
||
SGVector<float64_t> CRescaleFeatures::apply_to_feature_vector(SGVector<float64_t> vector) | ||
{ | ||
ASSERT(vector.vlen > 0); | ||
SGVector<float64_t> rescaled_vec = vector.clone(); | ||
float64_t min = vector[0]; | ||
float64_t max = vector[0]; | ||
|
||
/* find the max and min values in one loop */ | ||
for (index_t i = 1; i < vector.vlen; i++) | ||
{ | ||
min = CMath::min(vector[i], min); | ||
max = CMath::max(vector[i], max); | ||
} | ||
float64_t range = max - min; | ||
|
||
if (range > 0) | ||
{ | ||
rescaled_vec.add(-min); | ||
rescaled_vec.scale(1/range); | ||
} | ||
|
||
return rescaled_vec; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 20013 Viktor Gal | ||
* Copyright (C) 2013 Viktor Gal | ||
*/ | ||
|
||
#ifndef __RESCALEFEATURES_H__ | ||
#define __RESCALEFEATURES_H__ | ||
|
||
#include <shogun/preprocessor/DensePreprocessor.h> | ||
|
||
namespace shogun | ||
{ | ||
/**@brief Preprocessor RescaleFeautres is rescaling the range of features to | ||
* make the features independent of each other and aims to scale the range | ||
* in [0, 1] or [−1, 1]. | ||
* | ||
* The general formula is given as: | ||
* \f[ | ||
* x' = \frac{x - min}{max - min} | ||
* \f] | ||
* where \f$x\f$ is an original value, \f$x'\f$ is the normalized value. | ||
* It does not need any initialization. | ||
*/ | ||
class CRescaleFeatures : public CDensePreprocessor<float64_t> | ||
{ | ||
public: | ||
/** default ctor */ | ||
CRescaleFeatures(); | ||
|
||
/** dtor */ | ||
virtual ~CRescaleFeatures(); | ||
|
||
/** | ||
* initialize preprocessor from features | ||
* initialization is not required by this preprocessor. | ||
*/ | ||
virtual bool init(CFeatures* features); | ||
|
||
/** | ||
* Cleanup | ||
*/ | ||
virtual void cleanup(); | ||
|
||
/** | ||
* initialize preprocessor from file | ||
*/ | ||
virtual bool load(FILE* f); | ||
|
||
/** | ||
* save preprocessor init-data to file | ||
*/ | ||
virtual bool save(FILE* f); | ||
|
||
/** | ||
* Apply preproc on a feature matrix | ||
* | ||
* @param features input feature matrix | ||
* @return pointer to feature_matrix, i.e. f->get_feature_matrix(); | ||
*/ | ||
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features); | ||
|
||
/** | ||
* Apply preproc on a single feature vector | ||
* | ||
* @param vector the input feature vector | ||
* @return the output feature vector | ||
*/ | ||
virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector); | ||
|
||
/** @return object name */ | ||
virtual const char* get_name() const { return "RescaleFeatures"; } | ||
|
||
/** return a type of preprocessor */ | ||
virtual EPreprocessorType get_type() const { return P_RESCALEFEATURES; } | ||
}; | ||
} | ||
|
||
#endif /* __RESCALEFEATURES_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2013 Viktor Gal | ||
*/ | ||
|
||
#include <shogun/preprocessor/RescaleFeatures.h> | ||
#include <gtest/gtest.h> | ||
|
||
using namespace shogun; | ||
|
||
TEST(RescaleFeatures, apply_to_feature_vector) | ||
{ | ||
index_t vlen = 10; | ||
SGVector<float64_t> t(vlen); | ||
CRescaleFeatures rescaler; | ||
|
||
sg_rand->set_seed(12345); | ||
t.random(-1024, 1024); | ||
float64_t min = SGVector<float64_t>::min(t, vlen); | ||
float64_t max = SGVector<float64_t>::max(t, vlen); | ||
float64_t range = max - min; | ||
SGVector<float64_t> out = rescaler.apply_to_feature_vector(t); | ||
|
||
for (index_t i = 0; i < vlen; i++) { | ||
float64_t e = (t[i]-min)/range; | ||
EXPECT_DOUBLE_EQ(e, out[i]); | ||
} | ||
} | ||
|
||
TEST(RescaleFeatures, apply_to_feature_matrix) | ||
{ | ||
index_t num_features = 3; | ||
index_t num_vectors = 10; | ||
SGVector<float64_t> min(num_features), range(num_features); | ||
SGVector<float64_t> v(num_features*num_vectors), ev; | ||
v.random(-1024, 1024); | ||
ev = v.clone(); | ||
|
||
SGMatrix<float64_t> m(v.vector, num_features, num_vectors, false); | ||
SGMatrix<float64_t> em(ev.vector, num_features, num_vectors, false); | ||
CDenseFeatures<float64_t>* feats = new CDenseFeatures<float64_t>(m); | ||
CRescaleFeatures* rescaler = new CRescaleFeatures(); | ||
|
||
/* find the min and range for each feature among all the vectors */ | ||
for (index_t i = 0; i < num_features; i++) | ||
{ | ||
SGVector<float64_t> t = em.get_row_vector(i); | ||
min[i] = SGVector<float64_t>::min(t.vector, t.vlen); | ||
range[i] = SGVector<float64_t>::max(t.vector, t.vlen) - min[i]; | ||
} | ||
|
||
feats->add_preprocessor(rescaler); | ||
feats->apply_preprocessor(); | ||
for (index_t i = 0; i < num_vectors; i++) | ||
{ | ||
SGVector<float64_t> v = feats->get_feature_vector(i); | ||
float64_t* v_orig = em.get_column_vector(i); | ||
for (index_t j = 0; j < num_features; j++) { | ||
float64_t e = (v_orig[j]-min[j])/range[j]; | ||
EXPECT_DOUBLE_EQ(e, v[j]); | ||
} | ||
} | ||
|
||
SG_UNREF(feats); | ||
} |