Skip to content

Commit

Permalink
added block data structure
Browse files Browse the repository at this point in the history
  • Loading branch information
lambday committed Jun 28, 2016
1 parent 7257db1 commit 9158bbb
Show file tree
Hide file tree
Showing 3 changed files with 332 additions and 0 deletions.
84 changes: 84 additions & 0 deletions src/shogun/statistical_testing/internals/Block.cpp
@@ -0,0 +1,84 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2016 Soumyajit De
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <algorithm>
#include <shogun/lib/SGVector.h>
#include <shogun/features/Features.h>
#include <shogun/statistical_testing/internals/Block.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>

using namespace shogun;
using namespace internal;

Block::Block(CFeatures* feats, index_t index, index_t size) : m_feats(feats)
{
REQUIRE(m_feats!=nullptr, "Underlying feature object cannot be null!\n");

// increase the refcount of the underlying feature object
// we want this object to be alive till the last block is free'd
SG_REF(m_feats);

// create a shallow copy and subset current block separately
CFeatures* block=FeaturesUtil::create_shallow_copy(feats);
ASSERT(block->ref_count()==0);

SGVector<index_t> inds(size);
std::iota(inds.vector, inds.vector+inds.vlen, index*size);
block->add_subset(inds);

// since this block object is internal, we simply use a shared_ptr
m_block=std::shared_ptr<CFeatures>(block);
}

Block::Block(const Block& other) : m_block(other.m_block), m_feats(other.m_feats)
{
SG_REF(m_feats);
}

Block& Block::operator=(const Block& other)
{
m_block=other.m_block;
m_feats=other.m_feats;
SG_REF(m_feats);
return *this;
}

Block::~Block()
{
SG_UNREF(m_feats);
}

std::vector<Block> Block::create_blocks(CFeatures* feats, index_t num_blocks, index_t size)
{
std::vector<Block> vec;
for (index_t i=0; i<num_blocks; ++i)
vec.push_back(Block(feats, i, size));
return vec;
}
145 changes: 145 additions & 0 deletions src/shogun/statistical_testing/internals/Block.h
@@ -0,0 +1,145 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2016 Soumyajit De
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <memory>
#include <vector>
#include <shogun/lib/common.h>

#ifndef BLOCK_H__
#define BLOCK_H__

namespace shogun
{

class CFeatures;

namespace internal
{

/**
* @brief Class that holds a block feature. A block feature is a shallow
* copy of an underlying (non-owning) feature object. In its constructor,
* it increases the refcount of the original object (since it has to be
* alive as long as the block is alive) and it decreases the refcount of
* the original object in destructor.
*/
class Block
{
private:
/**
* Constructor to create a block object. It makes a shallow copy of
* the underlying feature object, and adds subset according to the
* block begin index and the blocksize.
*
* Increases the reference count of the underlying feature object.
*
* @param feats The underlying feature object.
* @param index The index of the block.
* @param size The size of the block (number of feature vectors).
*/
Block(CFeatures* feats, index_t index, index_t size);
public:
/**
* Copy constructor. Every time a block is copied or assigned, the underlying
* feature object is SG_REF'd.
*/
Block(const Block& other);

/**
* Assignment operator. Every time a block is copied or assigned, the underlying
* feature object is SG_REF'd.
*/
Block& operator=(const Block& other);

/**
* Destructor. Decreases the reference count of the underlying feature object.
*/
~Block();

/**
* Method that creates a number of block objects. See @Block for details.
*
* @param feats The underlying feature object.
* @param num_blocks The number of blocks to be formed.
* @param size The size of the block (number of feature vectors).
*/
static std::vector<Block> create_blocks(CFeatures* feats, index_t num_blocks, index_t size);

/**
* Operator overloading for getting the block object as a shared ptr (non-const).
*/
inline operator std::shared_ptr<CFeatures>()
{
return m_block;
}

/**
* Operator overloading for getting the block object as a naked ptr (non-const, unsafe).
*/
inline operator CFeatures*()
{
return m_block.get();
}

/**
* Operator overloading for getting the block object as a naked ptr (const).
*/
inline operator const CFeatures*() const
{
return m_block.get();
}

/**
* @return the block feature object (non-const, unsafe).
*/
inline CFeatures* get()
{
return static_cast<CFeatures*>(*this);
}

/**
* @return the block feature object (const).
*/
inline const CFeatures* get() const
{
return static_cast<const CFeatures*>(*this);
}
private:
/** Shallow copy representing the block */
std::shared_ptr<CFeatures> m_block;

/** Underlying feature object */
CFeatures* m_feats;
};

}

}
#endif // BLOCK_H__
103 changes: 103 additions & 0 deletions tests/unit/statistical_testing/internals/Block_unittest.cc
@@ -0,0 +1,103 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2016 Soumyajit De
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/features/Features.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/statistical_testing/internals/Block.h>
#include <gtest/gtest.h>

using namespace shogun;
using namespace internal;

TEST(Block, blocks)
{
const index_t dim=3;
const index_t num_vec=8;
const index_t blocksize=2;

SGMatrix<float64_t> data_p(dim, num_vec);
std::iota(data_p.matrix, data_p.matrix+dim*num_vec, 0);

using feat_type=CDenseFeatures<float64_t>;
auto feats_p=new feat_type(data_p);

// check whether correct number of blocks has been formed
auto blocks=Block::create_blocks(feats_p, num_vec/blocksize, blocksize);
ASSERT_TRUE(blocks.size()==size_t(num_vec/blocksize));

// check const cast operator
for (auto it=blocks.begin(); it!=blocks.end(); ++it)
{
const Block& block=*it;
auto block_feats=static_cast<const CFeatures*>(block);
ASSERT_TRUE(block_feats->get_num_vectors()==blocksize);
}

// check non-const cast operator
for (auto it=blocks.begin(); it!=blocks.end(); ++it)
{
Block& block=*it;
auto block_feats=static_cast<std::shared_ptr<CFeatures>>(block);
ASSERT_TRUE(block_feats->get_num_vectors()==blocksize);
}

// check const get() method
for (auto it=blocks.begin(); it!=blocks.end(); ++it)
{
const Block& block=*it;
auto block_feats=block.get();
ASSERT_TRUE(block_feats->get_num_vectors()==blocksize);
}

// check non-const get() method
for (auto it=blocks.begin(); it!=blocks.end(); ++it)
{
Block& block=*it;
auto block_feats=block.get();
ASSERT_TRUE(block_feats->get_num_vectors()==blocksize);
}

// check for proper block-wise organizing
SGVector<index_t> inds(blocksize);
std::iota(inds.vector, inds.vector+inds.vlen, 0);
for (size_t i=0; i<blocks.size(); ++i)
{
feats_p->add_subset(inds);
SGMatrix<float64_t> subset=feats_p->get_feature_matrix();
SGMatrix<float64_t> blockd=static_cast<feat_type*>(blocks[i].get())->get_feature_matrix();
ASSERT_TRUE(subset.equals(blockd));
feats_p->remove_subset();
std::for_each(inds.vector, inds.vector+inds.vlen, [&blocksize](index_t& val) { val+=blocksize; });
}

// no clean-up should be required
}

0 comments on commit 9158bbb

Please sign in to comment.