Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vantage point tree #708

Merged
merged 14 commits into from Aug 8, 2016
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/mlpack/core/tree/CMakeLists.txt
Expand Up @@ -16,6 +16,8 @@ set(SOURCES
binary_space_tree/midpoint_split_impl.hpp
binary_space_tree/single_tree_traverser.hpp
binary_space_tree/single_tree_traverser_impl.hpp
binary_space_tree/vantage_point_split.hpp
binary_space_tree/vantage_point_split_impl.hpp
binary_space_tree/traits.hpp
binary_space_tree/typedef.hpp
bounds.hpp
Expand Down
1 change: 1 addition & 0 deletions src/mlpack/core/tree/binary_space_tree.hpp
Expand Up @@ -11,6 +11,7 @@
#include "bounds.hpp"
#include "binary_space_tree/midpoint_split.hpp"
#include "binary_space_tree/mean_split.hpp"
#include "binary_space_tree/vantage_point_split.hpp"
#include "binary_space_tree/binary_space_tree.hpp"
#include "binary_space_tree/single_tree_traverser.hpp"
#include "binary_space_tree/single_tree_traverser_impl.hpp"
Expand Down
7 changes: 7 additions & 0 deletions src/mlpack/core/tree/binary_space_tree/typedef.hpp
Expand Up @@ -135,6 +135,13 @@ using MeanSplitBallTree = BinarySpaceTree<MetricType,
bound::BallBound,
MeanSplit>;

template<typename MetricType, typename StatisticType, typename MatType>
using VantagePointTree = BinarySpaceTree<MetricType,
StatisticType,
MatType,
bound::BallBound,
VantagePointSplit>;

} // namespace tree
} // namespace mlpack

Expand Down
197 changes: 197 additions & 0 deletions src/mlpack/core/tree/binary_space_tree/vantage_point_split.hpp
@@ -0,0 +1,197 @@
/**
* @file vantage_point_split.hpp
* @author Mikhail Lozhnikov
*
* Definition of class VantagePointSplit, a class that splits a binary space
* partitioning into two parts using the distance to a certain vantage point.
*/
#ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_HPP
#define MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_HPP

#include <mlpack/core.hpp>

namespace mlpack {
namespace tree /** Trees and tree-building procedures. */ {

template<typename BoundType, typename MatType = arma::mat>
class VantagePointSplit
{
public:
typedef typename MatType::elem_type ElemType;
/**
* Split the node according to the distance to a vantage point.
*
* @param bound The bound used for this node.
* @param data The dataset used by the binary space tree.
* @param begin Index of the starting point in the dataset that belongs to
* this node.
* @param count Number of points in this node.
* @param splitCol The index at which the dataset is divided into two parts
* after the rearrangement.
*/
static bool SplitNode(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
size_t& splitCol);

/**
* Split the node according to the distance to a vantage point.
*
* @param bound The bound used for this node.
* @param data The dataset used by the binary space tree.
* @param begin Index of the starting point in the dataset that belongs to
* this node.
* @param count Number of points in this node.
* @param splitCol The index at which the dataset is divided into two parts
* after the rearrangement.
* @param oldFromNew Vector which will be filled with the old positions for
* each new point.
*/
static bool SplitNode(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
size_t& splitCol,
std::vector<size_t>& oldFromNew);
private:
/**
* The maximum number of samples used for vantage point estimation and for
* estimation of the median.
*/
static const size_t maxNumSamples = 100;

template<typename StructElemType>
struct SortStruct
{
size_t point;
ElemType dist;
};

template<typename StructElemType>
static bool StructComp(const SortStruct<StructElemType>& s1,
const SortStruct<StructElemType>& s2)
{
return (s1.dist < s2.dist);
};

/**
* Select the best vantage point i.e. the point with the largest second moment
* of the distance from a number of random node points to the vantage point.
* Firstly this methods selects no more than maxNumSamples random points.
* Then it evaluates each point i.e. calcilates the corresponding second
* moment and selects the point with the largest moment. Each random point
* belongs to the node.
*
* @param bound The bound used for this node.
* @param data The dataset used by the binary space tree.
* @param begin Index of the starting point in the dataset that belongs to
* this node.
* @param count Number of points in this node.
* @param vantagePoint The index of the vantage point in the dataset.
* @param mu The median value of distance form the vantage point to
* a number of random points.
*/
static void SelectVantagePoint(const BoundType& bound, const MatType& data,
const size_t begin, const size_t count, size_t& vantagePoint, ElemType& mu);

/**
* Find no more then max(numSamples, upperBound) random samples i.e.
* random points that belong to the node. Each sample belongs to
* the interval [begin, begin + upperBound)
*
* @param distinctSamples The vector of samples indices.
* @param numSamples Maximum number of samples.
* @param begin The least index.
* @param upperBound The upper bound of indices.
*/
static void GetDistinctSamples(arma::uvec& distinctSamples,
const size_t numSamples, const size_t begin, const size_t upperBound);

/**
* Get the median value of the distance from a certain vantage point to a
* number of samples.
*
* @param bound The bound used for this node.
* @param data The dataset used by the binary space tree.
* @param samples The indices of random samples.
* @param vantagePoint The vantage point.
* @param mu The median value.
*/
static void GetMedian(const BoundType& bound, const MatType& data,
const arma::uvec& samples, const size_t vantagePoint, ElemType& mu);

/**
* Calculate the second moment of the distance from a certain vantage point to
* a number of random samples.
*
* @param bound The bound used for this node.
* @param data The dataset used by the binary space tree.
* @param samples The indices of random samples.
* @param vantagePoint The vantage point.
*/
static ElemType GetSecondMoment(const BoundType& bound, const MatType& data,
const arma::uvec& samples, const size_t vantagePoint);

/**
* This method returns true if a point should be assigned to the left subtree
* i.e. the distance from the point to the vantage point is less then
* the median value. Otherwise it returns false.
*
* @param bound The bound used for this node.
* @param data The dataset used by the binary space tree.
* @param vantagePoint The vantage point.
* @param point The point that is being assigned.
* @param mu The median value.
*/
template<typename VecType>
static bool AssignToLeftSubtree(const BoundType& bound, const MatType& mat,
const VecType& vantagePoint, const size_t point, const ElemType mu);

/**
* Perform split according to the median value and the vantage point.
*
* @param data The dataset used by the binary space tree.
* @param begin Index of the starting point in the dataset that belongs to
* this node.
* @param count Number of points in this node.
* @param vantagePoint The vantage point.
* @param mu The median value.
*/
template<typename VecType>
static size_t PerformSplit(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
const VecType& vantagePoint,
const ElemType mu);

/**
* Perform split according to the median value and the vantage point.
*
* @param data The dataset used by the binary space tree.
* @param begin Index of the starting point in the dataset that belongs to
* this node.
* @param count Number of points in this node.
* @param vantagePoint The vantage point.
* @param mu The median value.
* @param oldFromNew Vector which will be filled with the old positions for
* each new point.
*/
template<typename VecType>
static size_t PerformSplit(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
const VecType& vantagePoint,
const ElemType mu,
std::vector<size_t>& oldFromNew);
};

} // namespace tree
} // namespace mlpack

// Include implementation.
#include "vantage_point_split_impl.hpp"

#endif // MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_HPP