Skip to content

Commit

Permalink
KD-Tree and Ball tree added in a unified interface
Browse files Browse the repository at this point in the history
  • Loading branch information
mazumdarparijat committed Jul 11, 2014
1 parent b95d9a7 commit ca803db
Show file tree
Hide file tree
Showing 12 changed files with 1,419 additions and 0 deletions.
84 changes: 84 additions & 0 deletions src/shogun/multiclass/tree/BallTree.cpp
@@ -0,0 +1,84 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Parijat Mazumdar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <shogun/multiclass/tree/BallTree.h>

using namespace shogun;

CBallTree::CBallTree(int32_t leaf_size, EDistanceMetric d)
: CNbodyTree(leaf_size,d)
{
}

float64_t CBallTree::min_distsq(bnode_t* node,float64_t* feat, int32_t dim)
{
float64_t dist=0;
SGVector<float64_t> center=node->data.center;
for (int32_t i=0;i<dim;i++)
dist+=add_dim_dist(center[i]-feat[i]);

return CMath::max(0.0,dist-node->data.radius);
}

void CBallTree::init_node(bnode_t* node, index_t start, index_t end)
{
SGVector<float64_t> upper_bounds(m_data.num_rows);
SGVector<float64_t> lower_bounds(m_data.num_rows);

SGVector<float64_t> center(m_data.num_rows);
for (int32_t i=0;i<m_data.num_rows;i++)
{
center[i]=m_data(i,vec_id[start]);
upper_bounds[i]=m_data(i,vec_id[start]);
lower_bounds[i]=m_data(i,vec_id[start]);
for (int32_t j=start+1;j<=end;j++)
{
float64_t data_pt=m_data(i,vec_id[j]);
upper_bounds[i]=CMath::max(upper_bounds[i],data_pt);
lower_bounds[i]=CMath::min(lower_bounds[i],data_pt);
center[i]+=data_pt;
}

center[i]/=(end-start+1.f);
}

float64_t radius=0;
for (int32_t i=start;i<=end;i++)
radius=CMath::max(distance(vec_id[i],center.vector,center.vlen),radius);

actual_dists(&radius,1);

node->data.radius=radius;
node->data.center=center;
node->data.start_idx=start;
node->data.bbox_upper=upper_bounds;
node->data.bbox_lower=lower_bounds;
node->data.end_idx=end;
}
83 changes: 83 additions & 0 deletions src/shogun/multiclass/tree/BallTree.h
@@ -0,0 +1,83 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Parijat Mazumdar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/


#ifndef _BALLTREE_H__
#define _BALLTREE_H__

#include <shogun/lib/config.h>

#include <shogun/multiclass/tree/NbodyTree.h>

namespace shogun
{

/** @brief This class implements Ball tree.
*/
class CBallTree : public CNbodyTree
{
public:
/** constructor
*
* @param data data points using which Ball-Tree will be made
* @param leaf_size min number of samples in any node
*/
CBallTree(int32_t leaf_size=1, EDistanceMetric d=DM_EUCLID);

/** Destructor */
~CBallTree() { };

/** get name
* @return class of the tree
*/
virtual const char* get_name() const { return "BallTree"; }

private:
/** find squared minimum distance between node and a query vector
*
* @param node present node
* @param feat query vector
* @param dim dimensions of query vector
* @return squared min distance
*/
float64_t min_distsq(bnode_t* node,float64_t* feat, int32_t dim);

/** initialize node
*
* @param node node to be initialized
* @param start start index of index vector
* @param end end index of index vector
*/
void init_node(bnode_t* node, index_t start, index_t end);

};
} /* namespace shogun */

#endif /* _BALLTREE_H__ */
82 changes: 82 additions & 0 deletions src/shogun/multiclass/tree/KDTree.cpp
@@ -0,0 +1,82 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Parijat Mazumdar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <shogun/multiclass/tree/KDTree.h>

using namespace shogun;

CKDTree::CKDTree(int32_t leaf_size, EDistanceMetric d)
: CNbodyTree(leaf_size,d)
{
}

CKDTree::~CKDTree()
{
}

float64_t CKDTree::min_distsq(bnode_t* node,float64_t* feat, int32_t dim)
{
float64_t dist=0;
for (int32_t i=0;i<dim;i++)
{
float64_t dim_dist=(node->data.bbox_lower[i]-feat[i])+CMath::abs(feat[i]-node->data.bbox_lower[i]);
dim_dist+=(feat[i]-node->data.bbox_upper[i])+CMath::abs(feat[i]-node->data.bbox_upper[i]);
dist+=add_dim_dist(0.5*dim_dist);
}

return dist;
}

void CKDTree::init_node(bnode_t* node, index_t start, index_t end)
{
SGVector<float64_t> upper_bounds(m_data.num_rows);
SGVector<float64_t> lower_bounds(m_data.num_rows);

for (int32_t i=0;i<m_data.num_rows;i++)
{
upper_bounds[i]=m_data(i,vec_id[start]);
lower_bounds[i]=m_data(i,vec_id[start]);
for (int32_t j=start+1;j<=end;j++)
{
upper_bounds[i]=CMath::max(upper_bounds[i],m_data(i,vec_id[j]));
lower_bounds[i]=CMath::min(lower_bounds[i],m_data(i,vec_id[j]));
}
}

float64_t radius=0;
for (int32_t i=0;i<m_data.num_rows;i++)
radius=CMath::max(radius,upper_bounds[i]-lower_bounds[i]);

node->data.bbox_upper=upper_bounds;
node->data.bbox_lower=lower_bounds;
node->data.radius=0.5*radius;
node->data.start_idx=start;
node->data.end_idx=end;
}
83 changes: 83 additions & 0 deletions src/shogun/multiclass/tree/KDTree.h
@@ -0,0 +1,83 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Parijat Mazumdar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/


#ifndef _KDTREE_H__
#define _KDTREE_H__

#include <shogun/lib/config.h>

#include <shogun/multiclass/tree/NbodyTree.h>

namespace shogun
{

/** @brief This class implements KD-Tree.
*/
class CKDTree : public CNbodyTree
{
public:
/** constructor
*
* @param data data points using which KD-Tree will be made
* @param leaf_size min number of samples in any node
*/
CKDTree(int32_t leaf_size=1, EDistanceMetric d=DM_EUCLID);

/** Destructor */
~CKDTree();

/** get name
* @return class of the tree
*/
virtual const char* get_name() const { return "KDTree"; }

private:
/** find squared minimum distance between node and a query vector
*
* @param node present node
* @param feat query vector
* @param dim dimensions of query vector
* @return squared min distance
*/
float64_t min_distsq(bnode_t* node,float64_t* feat, int32_t dim);

/** initialize node
*
* @param node node to be initialized
* @param start start index of index vector
* @param end end index of index vector
*/
void init_node(bnode_t* node, index_t start, index_t end);

};
} /* namespace shogun */

#endif /* _KDREE_H__ */

0 comments on commit ca803db

Please sign in to comment.