Skip to content

Commit

Permalink
Merge pull request CGAL#4559 from sgiraudot/Spatial_searching-Paralle…
Browse files Browse the repository at this point in the history
…lize_kd_tree_build-GF

[Small Feature] Parallelize KD Tree build
  • Loading branch information
maxGimeno committed Apr 20, 2020
2 parents a7285e1 + 95b9f05 commit ae51708
Show file tree
Hide file tree
Showing 9 changed files with 349 additions and 137 deletions.
Expand Up @@ -161,9 +161,9 @@ class Point_set_feature_generator
CGAL::Real_timer t;
t.start();
if (lower_grid == nullptr)
neighborhood = new Neighborhood (input, point_map);
neighborhood = new Neighborhood (input, point_map, ConcurrencyTag());
else
neighborhood = new Neighborhood (input, point_map, voxel_size);
neighborhood = new Neighborhood (input, point_map, voxel_size, ConcurrencyTag());
t.stop();

if (lower_grid == nullptr)
Expand Down
Expand Up @@ -173,21 +173,42 @@ class Point_set_neighborhood
/*!
\brief Constructs a neighborhood object based on the input range.
\tparam ConcurrencyTag enables sequential versus parallel
algorithm. Possible values are `Sequential_tag`, `Parallel_tag`,
and `Parallel_if_available_tag`. If no tag is provided,
`Parallel_if_available_tag` is used.
\param input point range.
\param point_map property map to access the input points.
*/
template <typename ConcurrencyTag>
Point_set_neighborhood (const PointRange& input,
PointMap point_map)
PointMap point_map,
const ConcurrencyTag&)
: m_tree (nullptr)
{
init<ConcurrencyTag> (input, point_map);
}

/// \cond SKIP_IN_MANUAL
Point_set_neighborhood (const PointRange& input, PointMap point_map)
: m_tree (nullptr)
{
init<Parallel_if_available_tag> (input, point_map);
}

template <typename ConcurrencyTag>
void init (const PointRange& input, PointMap point_map)
{
My_point_property_map pmap (&input, point_map);
m_tree = new Tree (boost::counting_iterator<boost::uint32_t> (0),
boost::counting_iterator<boost::uint32_t> (boost::uint32_t(input.size())),
Splitter(),
Search_traits (pmap));
m_distance = Distance (pmap);
m_tree->build();
m_tree->template build<ConcurrencyTag>();
}
/// \endcond

/*!
\brief Constructs a simplified neighborhood object based on the input range.
Expand All @@ -197,14 +218,36 @@ class Point_set_neighborhood
present in one cell, only the point closest to the centroid of
this subset is used.
\tparam ConcurrencyTag enables sequential versus parallel
algorithm. Possible values are `Sequential_tag`, `Parallel_tag`,
and `Parallel_if_available_tag`. If no tag is provided,
`Parallel_if_available_tag` is used.
\param input input range.
\param point_map property map to access the input points.
\param voxel_size size of the cells of the 3D grid used for simplification.
*/
template <typename ConcurrencyTag>
Point_set_neighborhood (const PointRange& input,
PointMap point_map,
float voxel_size,
const ConcurrencyTag&)
: m_tree (nullptr)
{
init<ConcurrencyTag> (input, point_map, voxel_size);
}

/// \cond SKIP_IN_MANUAL
Point_set_neighborhood (const PointRange& input,
PointMap point_map,
float voxel_size)
: m_tree (nullptr)
{
init<Parallel_if_available_tag> (input, point_map, voxel_size);
}

template <typename ConcurrencyTag>
void init (const PointRange& input, PointMap point_map, float voxel_size)
{
// First, simplify
std::vector<boost::uint32_t> indices;
Expand All @@ -215,8 +258,9 @@ class Point_set_neighborhood
Splitter(),
Search_traits (pmap));
m_distance = Distance (pmap);
m_tree->build();
m_tree->template build<ConcurrencyTag>();
}
/// \endcond

/// @}

Expand Down
18 changes: 14 additions & 4 deletions Spatial_searching/doc/Spatial_searching/CGAL/Kd_tree.h
Expand Up @@ -23,7 +23,7 @@ in a dynamically allocated array (e.g., `Epick_d` with dynamic
dimension) &mdash; we says "to a lesser extent" because the points
are re-created by the kd-tree in a cache-friendly order after its construction,
so the coordinates are more likely to be stored in a near-optimal order on the
heap. When EnablePointsCache` is set to `Tag_true`, the points
heap. When `EnablePointsCache` is set to `Tag_true`, the points
coordinates will be cached in an optimal way. This will
increase memory consumption but provide better search performance.
See also the `GeneralDistance` and `FuzzyQueryItem` concepts for
Expand Down Expand Up @@ -115,7 +115,17 @@ at the first call to a query or removal member function. You can call
`build()` explicitly to ensure that the next call to
query functions will not trigger the reconstruction of the
data structure.
\tparam ConcurrencyTag enables sequential versus parallel
algorithm. Possible values are `Sequential_tag`, `Parallel_tag`, and
`Parallel_if_available_tag`. This template parameter is optional:
calling `build()` without specifying the concurrency tag will result
in `Sequential_tag` being used. If `build()` is not called by the user
but called implicitly at the first call to a query or removal member
function, `Sequential_tag` is also used.
*/
template <typename ConcurrencyTag>
void build();

/*!
Expand Down Expand Up @@ -147,14 +157,14 @@ template <class InputIterator> void insert(InputIterator first, InputIterator be
/*!
Removes the point `p` from the `k-d` tree. It uses `equal_to_p` to identify
the point after locating it, which can matter in particular when 2 points are
in the same place. `Identify_point` is a unary functor that takes a `Point_d`
in the same place. `IdentifyPoint` is a unary functor that takes a `Point_d`
and returns a `bool`. This is a limited and naive implementation that does not
rebalance the tree. On the other hand, the tree remains valid and ready for
queries. If the internal data structure is not already built, for instance
because the last operation was an insertion, it first calls `build()`.
*/
template<class Identify_point>
void remove(Point_d p, Identify_point equal_to_p);
template<class IdentifyPoint>
void remove(Point_d p, IdentifyPoint identify_point);

/*!
Removes point `p`, calling the 2-argument function `remove()` with a functor
Expand Down
60 changes: 46 additions & 14 deletions Spatial_searching/doc/Spatial_searching/Spatial_searching.txt
Expand Up @@ -407,6 +407,23 @@ higher dimensions.

\cgalExample{Spatial_searching/splitter_worst_cases.cpp}

\subsection Spatial_searchingExampleParallel Example for Parallel Neighbor Search

In order to speed-up the construction of the `kd` tree, the child
branches of each internal node can be computed in parallel, by calling
`Kd_tree::build<CGAL::Parallel_tag>()`. On a quad-core processor, the
parallel construction is experimentally 2 to 3 times faster than the
sequential version, depending on the point cloud. The parallel version
requires the executable to be linked against the <a href="https://www.threadingbuildingblocks.org">Intel TBB library</a>.

One query on the `kd` tree is purely sequential, but several queries
can be done in parallel.

The following example shows how to build the `kd` tree in parallel and
how to perform parallel queries:

\cgalExample{Spatial_searching/parallel_kdtree.cpp}

\section Performance Performance

\subsection OrthogonalPerfomance Performance of the Orthogonal Search
Expand All @@ -415,21 +432,33 @@ We took the gargoyle data set (Surface) from aim\@shape, and generated the same
We then consider three scenarios as data/queries.
The data set contains 800K points. For each query point we compute the K=10,20,30 closest points, with the default splitter and for the bucket size 10 (default) and 20.

The results were produced with the release 4.6 of \cgal, on an Intel i7 2.7 Ghz
laptop with 16 GB RAM, compiled with Visual C++ 2012 with the /O2 option.
The results were produced with the release 5.0 of \cgal, on an Intel i7 2.3 Ghz
laptop with 16 GB RAM, compiled with CLang++ 6 with the O3 option.

The values are the average of ten tests each. We show timings in seconds for both the building of the tree and the queries.

The values are the average of ten tests each.
<center>
k | bucket size | Surface Build | Random Build | Surface/Surface | Surface/Random | Random/Random
--|------------:|--------------:|-------------:|-----------------:|---------------:|----------------:
10| 10 | 0.17 | 0.31 | 1.13 | 15.35 | 3.40
10| 20 | 0.14 | 0.28 | 1.09 | 12.28 | 3.00
20| 10 | (see above) | (see above) | 1.88 | 18.25 | 5.39
20| 20 | (see above) | (see above) | 1.81 | 14.99 | 4.51
30| 10 | (see above) | (see above) | 2.87 | 22.62 | 7.07
30| 20 | (see above) | (see above) | 2.66 | 18.39 | 5.68
</center>

The same experiment is done using the parallel version of the tree building algorithm, and performing the queries in parallel too:

<center>
k | bucket size | Surface/Surface | Surface/Random | Random/Random
--|------------:|-----------------:|---------------:|----------------:
10| 10 | 0.89 | 11.48 | 2.63
10| 20 | 0.89 | 9.80 | 2.25
20| 10 | 1.60 | 13.41 | 4.06
20| 20 | 1.59 | 11.62 | 3.46
30| 10 | 2.35 | 15.52 | 5.42
30| 20 | 2.33 | 13.50 | 4.61
k | bucket size | Surface Build | Random Build | Surface/Surface | Surface/Random | Random/Random
--|------------:|--------------:|-------------:|-----------------:|---------------:|----------------:
10| 10 | 0.07 | 0.12 | 0.24 | 3.52 | 0.66
10| 20 | 0.06 | 0.12 | 0.22 | 2.87 | 0.57
20| 10 | (see above) | (see above) | 0.41 | 4.28 | 1.02
20| 20 | (see above) | (see above) | 0.38 | 3.43 | 0.88
30| 10 | (see above) | (see above) | 0.58 | 4.90 | 1.44
30| 20 | (see above) | (see above) | 0.60 | 4.28 | 1.28
</center>

\cgalFigureBegin{Spatial_searchingfigbenchmark,gargoyle.png}
Expand Down Expand Up @@ -520,9 +549,12 @@ additional requirements when using such a cache.

\section Spatial_searchingImplementationHistory Implementation History

The initial implementation of this package was done by Hans Tangelder and
Andreas Fabri. It was optimized in speed and memory consumption by Markus
Overtheil during an internship at GeometryFactory in 2014.
The initial implementation of this package was done by Hans Tangelder
and Andreas Fabri. It was optimized in speed and memory consumption by
Markus Overtheil during an internship at GeometryFactory in 2014. The
`EnablePointsCache` feature was introduced by Clément Jamin in 2019.
The parallel `kd` tree build function was introduced by Simon Giraudot
in 2020.

*/
} /* namespace CGAL */
Expand Down
1 change: 1 addition & 0 deletions Spatial_searching/doc/Spatial_searching/examples.txt
Expand Up @@ -18,4 +18,5 @@
\example Spatial_searching/weighted_Minkowski_distance.cpp
\example Spatial_searching/splitter_worst_cases.cpp
\example Spatial_searching/searching_sphere_orthogonally.cpp
\example Spatial_searching/parallel_kdtree.cpp
*/
9 changes: 9 additions & 0 deletions Spatial_searching/examples/Spatial_searching/CMakeLists.txt
Expand Up @@ -76,3 +76,12 @@ else()
message(STATUS "will not be compiled as they use CGAL::Epick_d which requires the Eigen library.")

endif()

find_package( TBB QUIET )
if(TBB_FOUND)
create_single_source_cgal_program( "parallel_kdtree.cpp" )
cgal_target_use_TBB(parallel_kdtree)
else()
message(STATUS "parallel_kdtree.cpp requires TBB and will not be compiled")
endif()

56 changes: 56 additions & 0 deletions Spatial_searching/examples/Spatial_searching/parallel_kdtree.cpp
@@ -0,0 +1,56 @@
#include <CGAL/Simple_cartesian.h>
#include <CGAL/point_generators_3.h>
#include <CGAL/Orthogonal_k_neighbor_search.h>
#include <CGAL/Search_traits_3.h>

#include <tbb/blocked_range.h>
#include <tbb/parallel_for.h>

using Kernel = CGAL::Simple_cartesian<double>;
using Point_3 = Kernel::Point_3;

using Traits = CGAL::Search_traits_3<Kernel>;
using Neighbor_search = CGAL::Orthogonal_k_neighbor_search<Traits>;
using Tree = Neighbor_search::Tree;
using Point_with_distance = Neighbor_search::Point_with_transformed_distance;

using Generator = CGAL::Random_points_in_sphere_3<Point_3>;

int main()
{
const unsigned int N = 1000;
const unsigned int k = 6;

// Generate N points in a sphere
std::vector<Point_3> points;
points.reserve (N);
Generator generator;
for (unsigned int i = 0; i < N; ++ i)
points.push_back (*(generator++));

// Build tree in parallel
Tree tree(points.begin(), points.end());
tree.build<CGAL::Parallel_tag>();

// Query tree in parallel
std::vector<std::vector<Point_3> > neighbors (points.size());
tbb::parallel_for (tbb::blocked_range<std::size_t> (0, points.size()),
[&](const tbb::blocked_range<std::size_t>& r)
{
for (std::size_t s = r.begin(); s != r.end(); ++ s)
{
// Neighbor search can be instantiated from
// several threads at the same time
Neighbor_search search (tree, points[s], k);
neighbors[s].reserve(k);

// neighbor search returns a set of pair of
// point and distance <Point_3,FT>, here we
// keep the points only
for (const Point_with_distance& pwd : search)
neighbors[s].push_back (pwd.first);
}
});

return 0;
}

0 comments on commit ae51708

Please sign in to comment.