Skip to content

Commit

Permalink
Merge pull request #3666 from lambday/develop
Browse files Browse the repository at this point in the history
Remove FeaturesUtil class, use CFeatures methods instead
  • Loading branch information
lambday committed Mar 15, 2017
2 parents 05777e0 + f208d0f commit 17907f4
Show file tree
Hide file tree
Showing 24 changed files with 280 additions and 620 deletions.
160 changes: 77 additions & 83 deletions src/shogun/features/DenseFeatures.cpp
Expand Up @@ -7,17 +7,19 @@
* Written (W) 1999-2010 Soeren Sonnenburg
* Written (W) 1999-2008 Gunnar Raetsch
* Written (W) 2011-2013 Heiko Strathmann
* Written (W) 2014-2017 Soumyajit De
* Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
* Copyright (C) 2010 Berlin Institute of Technology
*/

#include <shogun/base/some.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/io/SGIO.h>
#include <shogun/base/Parameter.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/eigen3.h>

#include <algorithm>
#include <string.h>

namespace shogun {
Expand Down Expand Up @@ -265,23 +267,50 @@ template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t
}
}

template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
template <class ST>
SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
{
if (!m_subset_stack->has_subsets())
return feature_matrix;

SGMatrix<ST> submatrix(num_features, get_num_vectors());
SGMatrix<ST> target(num_features, get_num_vectors());
copy_feature_matrix(target);
return target;
}

template <class ST>
void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> target, index_t column_offset) const
{
REQUIRE(column_offset>=0, "Column offset (%d) cannot be negative!\n", column_offset);
REQUIRE(!target.equals(feature_matrix), "Source and target feature matrices cannot be the same\n");

/* copy a subset vector wise */
for (int32_t i=0; i<submatrix.num_cols; ++i)
index_t num_vecs=get_num_vectors();
index_t num_cols=num_vecs+column_offset;

REQUIRE(target.matrix!=nullptr, "Provided matrix is not allocated!\n");
REQUIRE(target.num_rows==num_features,
"Number of rows of given matrix (%d) should be equal to the number of features (%d)!\n",
target.num_rows, num_features);
REQUIRE(target.num_cols>=num_cols,
"Number of cols of given matrix (%d) should be at least %d!\n",
target.num_cols, num_cols);

if (!m_subset_stack->has_subsets())
{
int32_t real_i = m_subset_stack->subset_idx_conversion(i);
memcpy(&submatrix.matrix[i*int64_t(num_features)],
&feature_matrix.matrix[real_i * int64_t(num_features)],
num_features * sizeof(ST));
auto src=feature_matrix.matrix;
auto dest=target.matrix+int64_t(num_features)*column_offset;
shogun::memcpy(dest, src, feature_matrix.size()*sizeof(ST));
}
else
{
for (int32_t i=0; i<num_vecs; ++i)
{
auto real_i=m_subset_stack->subset_idx_conversion(i);
auto src=feature_matrix.matrix+real_i*int64_t(num_features);
auto dest=target.matrix+int64_t(num_features)*(column_offset+i);
shogun::memcpy(dest, src, num_features*sizeof(ST));
}
}

return submatrix;
}

template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix()
Expand Down Expand Up @@ -977,101 +1006,66 @@ template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
return true;
}

template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
CList* others)
template <class ST>
CFeatures* CDenseFeatures<ST>::create_merged_copy(CList* others)
{
SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
SG_DEBUG("Entering.\n");

if (!others)
return NULL;
REQUIRE(others!=nullptr, "The list of other feature instances is not initialized!\n");

/* first, check other features and count number of elements */
CSGObject* other=others->get_first_element();
index_t num_vectors_merged=num_vectors;
while (other)
auto current=others->get_first_element();
auto total_num_vectors=get_num_vectors();
auto unref_required=others->get_delete_data();

while (current!=nullptr)
{
CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
auto casted=dynamic_cast<CDenseFeatures<ST>*>(current);

if (!casted)
{
SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
"same type as %s\n",get_name(), other->get_name(), get_name());
}
REQUIRE(casted!=nullptr, "Provided object's type (%s) must match own type (%s)!\n",
current->get_name(), get_name());
REQUIRE(num_features==casted->num_features,
"Provided feature object has different dimension (%d) than this one (%d)!\n",
casted->num_features, num_features);

if (get_feature_type()!=casted->get_feature_type() ||
get_feature_class()!=casted->get_feature_class() ||
strcmp(get_name(), casted->get_name()))
{
SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
get_name());
}
total_num_vectors+=casted->get_num_vectors();

if (num_features!=casted->num_features)
{
SG_ERROR("%s::create_merged_copy(): Provided feature object has "
"different dimension than this one\n");
}
if (unref_required)
SG_UNREF(current);

num_vectors_merged+=casted->get_num_vectors();

/* check if reference counting is used */
if (others->get_delete_data())
SG_UNREF(other);
other=others->get_next_element();
current=others->get_next_element();
}

/* create new feature matrix and copy both instances data into it */
SGMatrix<ST> data(num_features, num_vectors_merged);

/* copy data of this instance */
SG_DEBUG("copying matrix of this instance\n")
memcpy(data.matrix, feature_matrix.matrix,
num_features*num_vectors*sizeof(ST));
SGMatrix<ST> data(num_features, total_num_vectors);
index_t num_copied=0;
copy_feature_matrix(data, num_copied);
num_copied+=get_num_vectors();

/* count number of vectors (not elements) processed so far */
index_t num_processed=num_vectors;
current=others->get_first_element();

/* now copy data of other features block wise */
other=others->get_first_element();
while (other)
while (current!=nullptr)
{
/* cast is safe due to above check */
CDenseFeatures<ST>* casted=(CDenseFeatures<ST>*)other;

SG_DEBUG("copying matrix of provided instance\n")
memcpy(&(data.matrix[num_processed*num_features]),
casted->get_feature_matrix().matrix,
num_features*casted->get_num_vectors()*sizeof(ST));
auto casted=static_cast<CDenseFeatures<ST>*>(current);
casted->copy_feature_matrix(data, num_copied);
num_copied+=casted->get_num_vectors();

/* update counting */
num_processed+=casted->get_num_vectors();
if (unref_required)
SG_UNREF(current);

/* check if reference counting is used */
if (others->get_delete_data())
SG_UNREF(other);
other=others->get_next_element();
current=others->get_next_element();
}

/* create new instance and return */
CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
auto result=new CDenseFeatures<ST>(data);

SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
SG_DEBUG("Leaving.\n");
return result;
}

template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
CFeatures* other)
template <class ST>
CFeatures* CDenseFeatures<ST>::create_merged_copy(CFeatures* other)
{
SG_DEBUG("entering %s::create_merged_copy()\n", get_name());

/* create list with one element and call general method */
CList* list=new CList();
auto list=some<CList>();
list->append_element(other);
CFeatures* result=create_merged_copy(list);
SG_UNREF(list);

SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
return result;
return create_merged_copy(list);
}

template<class ST>
Expand Down
11 changes: 11 additions & 0 deletions src/shogun/features/DenseFeatures.h
Expand Up @@ -7,6 +7,7 @@
* Written (W) 1999-2010 Soeren Sonnenburg
* Written (W) 1999-2008 Gunnar Raetsch
* Written (W) 2011-2013 Heiko Strathmann
* Written (W) 2014-2017 Soumyajit De
* Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
* Copyright (C) 2010 Berlin Institute of Technology
*/
Expand Down Expand Up @@ -484,6 +485,7 @@ template<class ST> class CDenseFeatures: public CDotFeatures
/** Takes a list of feature instances and returns a new instance which is
* a concatenation of a copy if this instace's data and the given
* instancess data. Note that the feature types have to be equal.
* This method respects the subsets for all the feature instances involved.
*
* @param other feature object to append
* @return new feature object which contains copy of data of this
Expand Down Expand Up @@ -530,6 +532,15 @@ template<class ST> class CDenseFeatures: public CDotFeatures
void init();

protected:
/*
* Helper method which copies the working feature matrix into the pre-allocated
* target matrix passed to this method. If the size of the pre-allocated matrix
* is not sufficient to copy all the feature vectors, considering the column
* offset, it throws an error. It then copies into the target matrix, starting
* from base + (colum_offset * num_features) location.
*/
void copy_feature_matrix(SGMatrix<ST> target, index_t column_offset=0) const;

/// number of vectors in cache
int32_t num_vectors;

Expand Down
Expand Up @@ -36,7 +36,6 @@
#include <shogun/statistical_testing/TestEnums.h>
#include <shogun/statistical_testing/QuadraticTimeMMD.h>
#include <shogun/statistical_testing/MultiKernelQuadraticTimeMMD.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>
#include <shogun/statistical_testing/internals/KernelManager.h>
#include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
#include <shogun/statistical_testing/internals/mmd/VarianceH1.h>
Expand Down
7 changes: 3 additions & 4 deletions src/shogun/statistical_testing/QuadraticTimeMMD.cpp
Expand Up @@ -38,7 +38,6 @@
#include <shogun/statistical_testing/QuadraticTimeMMD.h>
#include <shogun/statistical_testing/MultiKernelQuadraticTimeMMD.h>
#include <shogun/statistical_testing/internals/Kernel.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>
#include <shogun/statistical_testing/internals/NextSamples.h>
#include <shogun/statistical_testing/internals/DataManager.h>
#include <shogun/statistical_testing/internals/KernelManager.h>
Expand Down Expand Up @@ -255,9 +254,9 @@ CFeatures* CQuadraticTimeMMD::get_p_and_q()
auto samples=data_mgr.next();
if (!samples.empty())
{
CFeatures *samples_p=samples[0][0].get();
CFeatures *samples_q=samples[1][0].get();
samples_p_and_q=FeaturesUtil::create_merged_copy(samples_p, samples_q);
CFeatures *samples_p=samples[0][0];
CFeatures *samples_q=samples[1][0];
samples_p_and_q=samples_p->create_merged_copy(samples_q);
samples.clear();
}
else
Expand Down
7 changes: 3 additions & 4 deletions src/shogun/statistical_testing/StreamingMMD.cpp
Expand Up @@ -42,7 +42,6 @@
#include <shogun/statistical_testing/kernelselection/KernelSelectionStrategy.h>
#include <shogun/statistical_testing/internals/NextSamples.h>
#include <shogun/statistical_testing/internals/DataManager.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>
#include <shogun/statistical_testing/internals/KernelManager.h>
#include <shogun/statistical_testing/internals/ComputationManager.h>
#include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
Expand Down Expand Up @@ -137,9 +136,9 @@ void CStreamingMMD::Self::merge_samples(NextSamples& next_burst, std::vector<CFe
#pragma omp parallel for
for (int64_t i=0; i<(int64_t)blocks.size(); ++i)
{
auto block_p=next_burst[0][i].get();
auto block_q=next_burst[1][i].get();
auto block_p_and_q=FeaturesUtil::create_merged_copy(block_p, block_q);
CFeatures *block_p=next_burst[0][i];
CFeatures *block_q=next_burst[1][i];
auto block_p_and_q=block_p->create_merged_copy(block_q);
blocks[i]=block_p_and_q;
}
next_burst.clear();
Expand Down
13 changes: 5 additions & 8 deletions src/shogun/statistical_testing/TwoDistributionTest.cpp
Expand Up @@ -34,7 +34,6 @@
#include <shogun/statistical_testing/internals/DataManager.h>
#include <shogun/statistical_testing/internals/TestTypes.h>
#include <shogun/statistical_testing/internals/NextSamples.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>

using namespace shogun;
using namespace internal;
Expand Down Expand Up @@ -107,10 +106,8 @@ CCustomDistance* CTwoDistributionTest::compute_distance(CDistance* distance)
auto samples=data_mgr.next();
REQUIRE(!samples.empty(), "Could not fetch samples!\n");

CFeatures *samples_p=samples[0][0].get();
CFeatures *samples_q=samples[1][0].get();
SG_REF(samples_p);
SG_REF(samples_q);
CFeatures *samples_p=samples[0][0];
CFeatures *samples_q=samples[1][0];

distance->cleanup();
distance->remove_lhs_and_rhs();
Expand Down Expand Up @@ -139,9 +136,9 @@ CCustomDistance* CTwoDistributionTest::compute_joint_distance(CDistance* distanc
auto samples=data_mgr.next();
REQUIRE(!samples.empty(), "Could not fetch samples!\n");

CFeatures *samples_p=samples[0][0].get();
CFeatures *samples_q=samples[1][0].get();
auto p_and_q=FeaturesUtil::create_merged_copy(samples_p, samples_q);
CFeatures *samples_p=samples[0][0];
CFeatures *samples_q=samples[1][0];
auto p_and_q=samples_p->create_merged_copy(samples_q);

samples.clear();
data_mgr.end();
Expand Down
10 changes: 5 additions & 5 deletions src/shogun/statistical_testing/internals/Block.cpp
Expand Up @@ -33,7 +33,6 @@
#include <shogun/lib/SGVector.h>
#include <shogun/features/Features.h>
#include <shogun/statistical_testing/internals/Block.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>

using namespace shogun;
using namespace internal;
Expand All @@ -47,32 +46,33 @@ Block::Block(CFeatures* feats, index_t index, index_t size) : m_feats(feats)
SG_REF(m_feats);

// create a shallow copy and subset current block separately
CFeatures* block=FeaturesUtil::create_shallow_copy(feats);
ASSERT(block->ref_count()==0);
CFeatures* block=feats->shallow_subset_copy();

SGVector<index_t> inds(size);
std::iota(inds.vector, inds.vector+inds.vlen, index*size);
block->add_subset(inds);

// since this block object is internal, we simply use a shared_ptr
m_block=std::shared_ptr<CFeatures>(block);
m_block=block;
}

Block::Block(const Block& other) : m_block(other.m_block), m_feats(other.m_feats)
{
SG_REF(m_block);
SG_REF(m_feats);
}

Block& Block::operator=(const Block& other)
{
m_block=other.m_block;
m_feats=other.m_feats;
SG_REF(m_block);
SG_REF(m_feats);
return *this;
}

Block::~Block()
{
SG_UNREF(m_block);
SG_UNREF(m_feats);
}

Expand Down

0 comments on commit 17907f4

Please sign in to comment.