Skip to content

Commit

Permalink
Cleanup preprocessors
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored and vigsterkr committed Jun 8, 2018
1 parent b2306f7 commit 80f36a6
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 45 deletions.
13 changes: 0 additions & 13 deletions src/shogun/preprocessor/DependenceMaximization.cpp
Expand Up @@ -60,19 +60,6 @@ CDependenceMaximization::~CDependenceMaximization()
SG_UNREF(m_labels_feats);
}

void CDependenceMaximization::fit(CFeatures* features)
{
REQUIRE(features, "Features are not initialized!\n");
REQUIRE(features->get_feature_class()==C_DENSE ||
features->get_feature_class()==C_SPARSE,
"Only allowed for dense/sparse features! Provided an instance of "
"%s which is of class %d!\n",
features->get_name(), features->get_feature_class());
REQUIRE(features->get_feature_type()==F_DREAL, "Only allowed for "
"features of double type! Provided %d!\n",
features->get_feature_type());
}

CFeatures* CDependenceMaximization::create_transformed_copy(CFeatures* features,
index_t idx)
{
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/preprocessor/KernelPCA.cpp
Expand Up @@ -89,9 +89,9 @@ void CKernelPCA::fit(CFeatures* features)
m_target_dim = n;
}

SGVector<float64_t> bias_tmp = linalg::rowwise_sum(kernel_matrix);
auto bias_tmp = linalg::rowwise_sum(kernel_matrix);
linalg::scale(bias_tmp, bias_tmp, -1.0 / n);
float64_t s = linalg::sum(bias_tmp) / n;
auto s = linalg::sum(bias_tmp) / n;
linalg::add_scalar(bias_tmp, -s);

linalg::center_matrix(kernel_matrix);
Expand Down
32 changes: 10 additions & 22 deletions src/shogun/preprocessor/PruneVarSubMean.cpp
Expand Up @@ -5,11 +5,12 @@
* Sergey Lisitsyn, Bjoern Esser
*/

#include <shogun/preprocessor/PruneVarSubMean.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/features/Features.h>
#include <shogun/io/SGIO.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/preprocessor/PruneVarSubMean.h>

using namespace shogun;

Expand All @@ -35,41 +36,29 @@ void CPruneVarSubMean::fit(CFeatures* features)
int32_t num_examples = simple_features->get_num_vectors();
int32_t num_features = simple_features->get_num_features();

m_mean = SGVector<float64_t>();
m_idx = SGVector<int32_t>();
m_std = SGVector<float64_t>();

m_mean.resize_vector(num_features);
float64_t* var = SG_MALLOC(float64_t, num_features);
int32_t i, j;

memset(var, 0, num_features * sizeof(float64_t));
m_mean.zero();
SGVector<float64_t> var(num_features);

auto feature_matrix = simple_features->get_feature_matrix();

// compute mean
for (i = 0; i < num_examples; i++)
{
for (j = 0; j < num_features; j++)
m_mean[j] += feature_matrix.matrix[i * num_features + j];
}

for (j = 0; j < num_features; j++)
m_mean[j] /= num_examples;
m_mean = linalg::rowwise_sum(feature_matrix);
linalg::scale(m_mean, m_mean, 1.0 / num_examples);

// compute var
for (i = 0; i < num_examples; i++)
for (auto i : range(num_examples))
{
for (j = 0; j < num_features; j++)
for (auto j : range(num_features))
var[j] += CMath::sq(
m_mean[j] - feature_matrix.matrix[i * num_features + j]);
}

int32_t num_ok = 0;
int32_t* idx_ok = SG_MALLOC(int32_t, num_features);

for (j = 0; j < num_features; j++)
for (auto j : range(num_features))
{
var[j] /= num_examples;

Expand All @@ -86,15 +75,14 @@ void CPruneVarSubMean::fit(CFeatures* features)
SGVector<float64_t> new_mean(num_ok);
m_std.resize_vector(num_ok);

for (j = 0; j < num_ok; j++)
for (auto j : range(num_ok))
{
m_idx[j] = idx_ok[j];
new_mean[j] = m_mean[idx_ok[j]];
m_std[j] = std::sqrt(var[idx_ok[j]]);
}
m_num_idx = num_ok;
SG_FREE(idx_ok);
SG_FREE(var);
m_mean = new_mean;

m_initialized = true;
Expand Down
15 changes: 7 additions & 8 deletions src/shogun/preprocessor/SortUlongString.cpp
Expand Up @@ -4,10 +4,11 @@
* Authors: Soeren Sonnenburg
*/

#include <shogun/preprocessor/SortUlongString.h>
#include <shogun/base/range.h>
#include <shogun/features/Features.h>
#include <shogun/features/StringFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/preprocessor/SortUlongString.h>

using namespace shogun;

Expand Down Expand Up @@ -46,15 +47,14 @@ bool CSortUlongString::save(FILE* f)
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
bool CSortUlongString::apply_to_string_features(CFeatures* f)
{
int32_t i;
auto sf = f->as<CStringFeatures<uint64_t>>();
int32_t num_vec = sf->get_num_vectors();
auto num_vec = sf->get_num_vectors();

for (i=0; i<num_vec; i++)
for (auto i : range(num_vec))
{
int32_t len=0;
bool free_vec;
uint64_t* vec = sf->get_feature_vector(i, len, free_vec);
auto vec = sf->get_feature_vector(i, len, free_vec);
ASSERT(!free_vec) // won't work with non-in-memory string features

SG_DEBUG("sorting string of length %i\n", len)
Expand All @@ -69,10 +69,9 @@ bool CSortUlongString::apply_to_string_features(CFeatures* f)
uint64_t* CSortUlongString::apply_to_string(uint64_t* f, int32_t& len)
{
uint64_t* vec=SG_MALLOC(uint64_t, len);
int32_t i=0;

for (i=0; i<len; i++)
vec[i]=f[i];
for (auto i : range(len))
vec[i] = f[i];

//CMath::qsort(vec, len);
CMath::radix_sort(vec, len);
Expand Down

0 comments on commit 80f36a6

Please sign in to comment.