Skip to content

Commit

Permalink
Cleanup dense preprocessors and rename init to fit
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored and vigsterkr committed Jun 8, 2018
1 parent 6e8a04e commit 4784e33
Show file tree
Hide file tree
Showing 23 changed files with 172 additions and 226 deletions.
2 changes: 1 addition & 1 deletion src/shogun/metric/LMNNImpl.cpp
Expand Up @@ -365,7 +365,7 @@ SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>*
new CDenseFeatures<float64_t>(features->get_feature_matrix().clone());
CPruneVarSubMean* mean_substractor =
new CPruneVarSubMean(false); // false to avoid variance normalization
mean_substractor->init(cloned_features);
mean_substractor->fit(cloned_features);
mean_substractor->apply_to_feature_matrix(cloned_features);

// Obtain the linear transform applying PCA
Expand Down
9 changes: 1 addition & 8 deletions src/shogun/preprocessor/HomogeneousKernelMap.cpp
Expand Up @@ -43,13 +43,6 @@ CHomogeneousKernelMap::~CHomogeneousKernelMap()
{
}

bool CHomogeneousKernelMap::init(CFeatures* features)
{
ASSERT(features->get_feature_class()==C_DENSE)
ASSERT(features->get_feature_type()==F_DREAL)

return true;
}

void CHomogeneousKernelMap::cleanup()
{
Expand Down Expand Up @@ -163,7 +156,7 @@ void CHomogeneousKernelMap::init()

SGMatrix<float64_t> CHomogeneousKernelMap::apply_to_feature_matrix (CFeatures* features)
{
CDenseFeatures<float64_t>* simple_features = (CDenseFeatures<float64_t>*)features;
auto simple_features = features->as<CDenseFeatures<float64_t>>();
int32_t num_vectors = simple_features->get_num_vectors ();
int32_t num_features = simple_features->get_num_features ();

Expand Down
3 changes: 0 additions & 3 deletions src/shogun/preprocessor/HomogeneousKernelMap.h
Expand Up @@ -69,9 +69,6 @@ namespace shogun
/** destructor */
virtual ~CHomogeneousKernelMap();

/** initialize preprocessor from features */
virtual bool init(CFeatures* features);

/** cleanup */
virtual void cleanup();

Expand Down
13 changes: 2 additions & 11 deletions src/shogun/preprocessor/LogPlusOne.cpp
Expand Up @@ -21,15 +21,6 @@ CLogPlusOne::~CLogPlusOne()
{
}

/// initialize preprocessor from features
bool CLogPlusOne::init(CFeatures* features)
{
ASSERT(features->get_feature_class()==C_DENSE)
ASSERT(features->get_feature_type()==F_DREAL)

return true;
}

/// clean up allocated memory
void CLogPlusOne::cleanup()
{
Expand All @@ -56,8 +47,8 @@ bool CLogPlusOne::save(FILE* f)
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CLogPlusOne::apply_to_feature_matrix(CFeatures* features)
{
SGMatrix<float64_t> feature_matrix =
((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();

for (int32_t i=0; i<feature_matrix.num_cols; i++)
{
Expand Down
3 changes: 0 additions & 3 deletions src/shogun/preprocessor/LogPlusOne.h
Expand Up @@ -37,9 +37,6 @@ class CLogPlusOne : public CDensePreprocessor<float64_t>
/** destructor */
virtual ~CLogPlusOne();

/// initialize preprocessor from features
virtual bool init(CFeatures* features);

/// cleanup
virtual void cleanup();
/// initialize preprocessor from file
Expand Down
12 changes: 2 additions & 10 deletions src/shogun/preprocessor/NormOne.cpp
Expand Up @@ -22,15 +22,6 @@ CNormOne::~CNormOne()
{
}

/// initialize preprocessor from features
bool CNormOne::init(CFeatures* features)
{
ASSERT(features->get_feature_class()==C_DENSE)
ASSERT(features->get_feature_type()==F_DREAL)

return true;
}

/// clean up allocated memory
void CNormOne::cleanup()
{
Expand All @@ -57,7 +48,8 @@ bool CNormOne::save(FILE* f)
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CNormOne::apply_to_feature_matrix(CFeatures* features)
{
SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();

for (int32_t i=0; i<feature_matrix.num_cols; i++)
{
Expand Down
2 changes: 0 additions & 2 deletions src/shogun/preprocessor/NormOne.h
Expand Up @@ -37,8 +37,6 @@ class CNormOne : public CDensePreprocessor<float64_t>
/** destructor */
virtual ~CNormOne();

/// initialize preprocessor from features
virtual bool init(CFeatures* features);
/// cleanup
virtual void cleanup();
/// initialize preprocessor from file
Expand Down
12 changes: 2 additions & 10 deletions src/shogun/preprocessor/PNorm.cpp
Expand Up @@ -35,15 +35,6 @@ CPNorm::~CPNorm ()
{
}

/// initialize preprocessor from features
bool CPNorm::init (CFeatures* features)
{
ASSERT(features->get_feature_class()==C_DENSE)
ASSERT(features->get_feature_type()==F_DREAL)

return true;
}

/// clean up allocated memory
void CPNorm::cleanup ()
{
Expand All @@ -70,7 +61,8 @@ bool CPNorm::save (FILE* f)
/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
SGMatrix<float64_t> CPNorm::apply_to_feature_matrix (CFeatures* features)
{
SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();

for (int32_t i=0; i<feature_matrix.num_cols; i++)
{
Expand Down
2 changes: 0 additions & 2 deletions src/shogun/preprocessor/PNorm.h
Expand Up @@ -40,8 +40,6 @@ class CPNorm : public CDensePreprocessor<float64_t>
/** destructor */
virtual ~CPNorm ();

/// initialize preprocessor from features
virtual bool init (CFeatures* features);
/// cleanup
virtual void cleanup ();
/// initialize preprocessor from file
Expand Down
116 changes: 55 additions & 61 deletions src/shogun/preprocessor/PruneVarSubMean.cpp
Expand Up @@ -26,84 +26,78 @@ CPruneVarSubMean::~CPruneVarSubMean()
cleanup();
}

/// initialize preprocessor from features
bool CPruneVarSubMean::init(CFeatures* features)
void CPruneVarSubMean::fit(CFeatures* features)
{
if (!m_initialized)
{
ASSERT(features->get_feature_class()==C_DENSE)
ASSERT(features->get_feature_type()==F_DREAL)
if (m_initialized)
cleanup();

CDenseFeatures<float64_t>* simple_features=(CDenseFeatures<float64_t>*) features;
int32_t num_examples = simple_features->get_num_vectors();
int32_t num_features = simple_features->get_num_features();
auto simple_features = features->as<CDenseFeatures<float64_t>>();
int32_t num_examples = simple_features->get_num_vectors();
int32_t num_features = simple_features->get_num_features();

m_mean = SGVector<float64_t>();
m_idx = SGVector<int32_t>();
m_std = SGVector<float64_t>();;
m_mean = SGVector<float64_t>();
m_idx = SGVector<int32_t>();
m_std = SGVector<float64_t>();

m_mean.resize_vector(num_features);
float64_t* var=SG_MALLOC(float64_t, num_features);
int32_t i,j;
m_mean.resize_vector(num_features);
float64_t* var = SG_MALLOC(float64_t, num_features);
int32_t i, j;

memset(var, 0, num_features*sizeof(float64_t));
m_mean.zero();
memset(var, 0, num_features * sizeof(float64_t));
m_mean.zero();

SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
auto feature_matrix = simple_features->get_feature_matrix();

// compute mean
for (i=0; i<num_examples; i++)
{
for (j=0; j<num_features; j++)
m_mean[j]+=feature_matrix.matrix[i*num_features+j];
}
// compute mean
for (i = 0; i < num_examples; i++)
{
for (j = 0; j < num_features; j++)
m_mean[j] += feature_matrix.matrix[i * num_features + j];
}

for (j=0; j<num_features; j++)
m_mean[j]/=num_examples;
for (j = 0; j < num_features; j++)
m_mean[j] /= num_examples;

// compute var
for (i=0; i<num_examples; i++)
{
for (j=0; j<num_features; j++)
var[j]+=CMath::sq(m_mean[j]-feature_matrix.matrix[i*num_features+j]);
}
// compute var
for (i = 0; i < num_examples; i++)
{
for (j = 0; j < num_features; j++)
var[j] += CMath::sq(
m_mean[j] - feature_matrix.matrix[i * num_features + j]);
}

int32_t num_ok=0;
int32_t* idx_ok=SG_MALLOC(int32_t, num_features);
int32_t num_ok = 0;
int32_t* idx_ok = SG_MALLOC(int32_t, num_features);

for (j=0; j<num_features; j++)
{
var[j]/=num_examples;
for (j = 0; j < num_features; j++)
{
var[j] /= num_examples;

if (var[j]>=1e-14)
{
idx_ok[num_ok]=j;
num_ok++ ;
}
if (var[j] >= 1e-14)
{
idx_ok[num_ok] = j;
num_ok++;
}
}

SG_INFO("Reducing number of features from %i to %i\n", num_features, num_ok)
SG_INFO("Reducing number of features from %i to %i\n", num_features, num_ok)

m_idx.resize_vector(num_ok);
SGVector<float64_t> new_mean(num_ok);
m_std.resize_vector(num_ok);
m_idx.resize_vector(num_ok);
SGVector<float64_t> new_mean(num_ok);
m_std.resize_vector(num_ok);

for (j=0; j<num_ok; j++)
{
m_idx[j]=idx_ok[j] ;
new_mean[j]=m_mean[idx_ok[j]];
m_std[j] = std::sqrt(var[idx_ok[j]]);
}
m_num_idx = num_ok;
SG_FREE(idx_ok);
SG_FREE(var);
m_mean = new_mean;

m_initialized = true;
return true;
for (j = 0; j < num_ok; j++)
{
m_idx[j] = idx_ok[j];
new_mean[j] = m_mean[idx_ok[j]];
m_std[j] = std::sqrt(var[idx_ok[j]]);
}
else
return false;
m_num_idx = num_ok;
SG_FREE(idx_ok);
SG_FREE(var);
m_mean = new_mean;

m_initialized = true;
}

/// clean up allocated memory
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/preprocessor/PruneVarSubMean.h
Expand Up @@ -35,8 +35,8 @@ class CPruneVarSubMean : public CDensePreprocessor<float64_t>
/** destructor */
virtual ~CPruneVarSubMean();

/// initialize preprocessor from features
virtual bool init(CFeatures* features);
/// Fit preprocessor into features
virtual void fit(CFeatures* features);

/// cleanup
virtual void cleanup();
Expand Down
20 changes: 4 additions & 16 deletions src/shogun/preprocessor/RandomFourierGaussPreproc.cpp
Expand Up @@ -329,36 +329,26 @@ void CRandomFourierGaussPreproc::set_randomcoefficients(

}

bool CRandomFourierGaussPreproc::init(CFeatures *f) {
if (f->get_feature_class() != get_feature_class()) {
throw ShogunException(
"CRandomFourierGaussPreproc::init (CFeatures *f) requires CDenseFeatures<float64_t> as features\n");
}
if (f->get_feature_type() != get_feature_type()) {
throw ShogunException(
"CRandomFourierGaussPreproc::init (CFeatures *f) requires CDenseFeatures<float64_t> as features\n");
}
void CRandomFourierGaussPreproc::fit(CFeatures* f)
{
if (dim_feature_space <= 0) {
throw ShogunException(
"CRandomFourierGaussPreproc::init (CFeatures *f): dim_feature_space<=0 is not allowed, use void set_dim_feature_space(const int32 dim) before!\n");
}

SG_INFO("calling CRandomFourierGaussPreproc::init(...)\n")
int32_t num_features =
((CDenseFeatures<float64_t>*) f)->get_num_features();
f->as<CDenseFeatures<float64_t>>()->get_num_features();

if (!test_rfinited()) {
dim_input_space = num_features;
init_randomcoefficients();
ASSERT( test_rfinited())
return true;
} else {
dim_input_space = num_features;
// does not reinit if dimension is the same to avoid overriding a previous call of set_randomcoefficients(...)
bool inited = init_randomcoefficients();
return inited;
init_randomcoefficients();
}

}

SGVector<float64_t> CRandomFourierGaussPreproc::apply_to_feature_vector(SGVector<float64_t> vector)
Expand All @@ -381,8 +371,6 @@ SGVector<float64_t> CRandomFourierGaussPreproc::apply_to_feature_vector(SGVector

SGMatrix<float64_t> CRandomFourierGaussPreproc::apply_to_feature_matrix(CFeatures* features)
{
init(features);

// version for case dim_feature_space < dim_input space with direct transformation on feature matrix ??

int32_t num_vectors = 0;
Expand Down

0 comments on commit 4784e33

Please sign in to comment.