From 45641056474fbf5874c4c46fae5ee1c06fa6758c Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 22:33:42 -0700 Subject: [PATCH 1/7] move nanort to standard location, update to latest --- deps/nanort/include/{nanort => }/nanort.h | 1711 +++++++++++++++------ src/surface/simple_polygon_mesh.cpp | 13 +- 2 files changed, 1262 insertions(+), 462 deletions(-) rename deps/nanort/include/{nanort => }/nanort.h (51%) diff --git a/deps/nanort/include/nanort/nanort.h b/deps/nanort/include/nanort.h similarity index 51% rename from deps/nanort/include/nanort/nanort.h rename to deps/nanort/include/nanort.h index cada5917..557e4abe 100644 --- a/deps/nanort/include/nanort/nanort.h +++ b/deps/nanort/include/nanort.h @@ -2,10 +2,16 @@ // NanoRT, single header only modern ray tracing kernel. // +// +// Notes : The number of primitives are up to 2G. If you want to render large +// data, please split data into chunks(~ 2G prims) and use NanoSG scene graph +// library(`${nanort}/examples/nanosg`). +// + /* The MIT License (MIT) -Copyright (c) 2015 - 2016 Light Transport Entertainment, Inc. +Copyright (c) 2015 - Present: Light Transport Entertainment Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -42,11 +48,61 @@ THE SOFTWARE. #include #include +// compiler macros +// +// NANORT_USE_CPP11_FEATURE : Enable C++11 feature +// NANORT_ENABLE_PARALLEL_BUILD : Enable parallel BVH build. +// NANORT_ENABLE_SERIALIZATION : Enable serialization feature for built BVH. +// +// Parallelized BVH build is supported on C++11 thread version. +// OpenMP version is not fully tested. +// thus turn off if you face a problem when building BVH in parallel. +// #define NANORT_ENABLE_PARALLEL_BUILD + +// Some constants +#define kNANORT_MAX_STACK_DEPTH (512) +#define kNANORT_MIN_PRIMITIVES_FOR_PARALLEL_BUILD (1024 * 8) +#define kNANORT_SHALLOW_DEPTH (4) // will create 2**N subtrees + +#ifdef NANORT_USE_CPP11_FEATURE +// Assume C++11 compiler has thread support. +// In some situation (e.g. embedded system, JIT compilation), thread feature +// may not be available though... +#include +#include +#include + +#define kNANORT_MAX_THREADS (256) + +// Parallel build should work well for C++11 version, thus force enable it. +#ifndef NANORT_ENABLE_PARALLEL_BUILD +#define NANORT_ENABLE_PARALLEL_BUILD +#endif + +#endif + namespace nanort { -// Parallelized BVH build is not yet fully tested, -// thus turn off if you face a problem when building BVH. -#define NANORT_ENABLE_PARALLEL_BUILD (1) +// RayType +typedef enum { + RAY_TYPE_NONE = 0x0, + RAY_TYPE_PRIMARY = 0x1, + RAY_TYPE_SECONDARY = 0x2, + RAY_TYPE_DIFFUSE = 0x4, + RAY_TYPE_REFLECTION = 0x8, + RAY_TYPE_REFRACTION = 0x10 +} RayType; + +#ifdef __clang__ +#pragma clang diagnostic push +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + +#if __has_warning("-Wunsafe-buffer-usage") +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" +#endif +#endif // ---------------------------------------------------------------------------- // Small vector class useful for multi-threaded environment. @@ -78,7 +134,7 @@ namespace nanort { template class StackAllocator : public std::allocator { public: - typedef typename std::allocator::pointer pointer; + typedef T* pointer; typedef typename std::allocator::size_type size_type; // Backing store for the allocator. The container owner is responsible for @@ -146,7 +202,11 @@ class StackAllocator : public std::allocator { source_->used_stack_buffer_ = true; return source_->stack_buffer(); } else { +#if __cplusplus >= 201703L + return std::allocator_traits>::allocate(*this, n, hint); +#else return std::allocator::allocate(n, hint); +#endif } } @@ -301,14 +361,12 @@ class real3 { real3 operator/(const real3 &f2) const { return real3(x() / f2.x(), y() / f2.y(), z() / f2.z()); } - real3 operator-() const { - return real3(-x(), -y(), -z()); - } + real3 operator-() const { return real3(-x(), -y(), -z()); } T operator[](int i) const { return v[i]; } T &operator[](int i) { return v[i]; } T v[3]; - // T pad; // for alignment(when T = float) + // T pad; // for alignment (when T = float) }; template @@ -330,8 +388,8 @@ template inline real3 vnormalize(const real3 &rhs) { real3 v = rhs; T len = vlength(rhs); - if (fabs(len) > 1.0e-6f) { - float inv_len = 1.0f / len; + if (std::fabs(len) > std::numeric_limits::epsilon()) { + T inv_len = static_cast(1.0) / len; v.v[0] *= inv_len; v.v[1] *= inv_len; v.v[2] *= inv_len; @@ -340,7 +398,7 @@ inline real3 vnormalize(const real3 &rhs) { } template -inline real3 vcross(real3 a, real3 b) { +inline real3 vcross(const real3 a, const real3 b) { real3 c; c[0] = a[1] * b[2] - a[2] * b[1]; c[1] = a[2] * b[0] - a[0] * b[2]; @@ -349,10 +407,63 @@ inline real3 vcross(real3 a, real3 b) { } template -inline T vdot(real3 a, real3 b) { +inline T vdot(const real3 a, const real3 b) { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; } +template +inline real3 vsafe_inverse(const real3 v) { + real3 r; + +#ifdef NANORT_USE_CPP11_FEATURE + + if (std::fabs(v[0]) < std::numeric_limits::epsilon()) { + r[0] = std::numeric_limits::infinity() * + std::copysign(static_cast(1), v[0]); + } else { + r[0] = static_cast(1.0) / v[0]; + } + + if (std::fabs(v[1]) < std::numeric_limits::epsilon()) { + r[1] = std::numeric_limits::infinity() * + std::copysign(static_cast(1), v[1]); + } else { + r[1] = static_cast(1.0) / v[1]; + } + + if (std::fabs(v[2]) < std::numeric_limits::epsilon()) { + r[2] = std::numeric_limits::infinity() * + std::copysign(static_cast(1), v[2]); + } else { + r[2] = static_cast(1.0) / v[2]; + } +#else + + if (std::fabs(v[0]) < std::numeric_limits::epsilon()) { + T sgn = (v[0] < static_cast(0)) ? static_cast(-1) : static_cast(1); + r[0] = std::numeric_limits::infinity() * sgn; + } else { + r[0] = static_cast(1.0) / v[0]; + } + + if (std::fabs(v[1]) < std::numeric_limits::epsilon()) { + T sgn = (v[1] < static_cast(0)) ? static_cast(-1) : static_cast(1); + r[1] = std::numeric_limits::infinity() * sgn; + } else { + r[1] = static_cast(1.0) / v[1]; + } + + if (std::fabs(v[2]) < std::numeric_limits::epsilon()) { + T sgn = (v[2] < static_cast(0)) ? static_cast(-1) : static_cast(1); + r[2] = std::numeric_limits::infinity() * sgn; + } else { + r[2] = static_cast(1.0) / v[2]; + } +#endif + + return r; +} + template inline const real *get_vertex_addr(const real *p, const size_t idx, const size_t stride_bytes) { @@ -360,25 +471,70 @@ inline const real *get_vertex_addr(const real *p, const size_t idx, reinterpret_cast(p) + idx * stride_bytes); } -template +template class Ray { public: - real org[3]; // must set - real dir[3]; // must set - real min_t; // minium ray hit distance. must set. - real max_t; // maximum ray hit distance. must set. - real inv_dir[3]; // filled internally - int dir_sign[3]; // filled internally + Ray() + : min_t(static_cast(0.0)), + max_t(std::numeric_limits::max()), + type(RAY_TYPE_NONE) { + org[0] = static_cast(0.0); + org[1] = static_cast(0.0); + org[2] = static_cast(0.0); + dir[0] = static_cast(0.0); + dir[1] = static_cast(0.0); + dir[2] = static_cast(-1.0); + } + + T org[3]; // must set + T dir[3]; // must set + T min_t; // minimum ray hit distance. + T max_t; // maximum ray hit distance. + unsigned int type; // ray type + + // TODO(LTE): Align sizeof(Ray) }; -template +template class BVHNode { public: BVHNode() {} + BVHNode(const BVHNode &rhs) { + bmin[0] = rhs.bmin[0]; + bmin[1] = rhs.bmin[1]; + bmin[2] = rhs.bmin[2]; + flag = rhs.flag; + + bmax[0] = rhs.bmax[0]; + bmax[1] = rhs.bmax[1]; + bmax[2] = rhs.bmax[2]; + axis = rhs.axis; + + data[0] = rhs.data[0]; + data[1] = rhs.data[1]; + } + + BVHNode &operator=(const BVHNode &rhs) { + bmin[0] = rhs.bmin[0]; + bmin[1] = rhs.bmin[1]; + bmin[2] = rhs.bmin[2]; + flag = rhs.flag; + + bmax[0] = rhs.bmax[0]; + bmax[1] = rhs.bmax[1]; + bmax[2] = rhs.bmax[2]; + axis = rhs.axis; + + data[0] = rhs.data[0]; + data[1] = rhs.data[1]; + + return (*this); + } + ~BVHNode() {} - real bmin[3]; - real bmax[3]; + T bmin[3]; + T bmax[3]; int flag; // 1 = leaf node, 0 = branch node int axis; @@ -393,10 +549,10 @@ class BVHNode { unsigned int data[2]; }; -template -class IsectComparator { +template +class IntersectComparator { public: - bool operator()(const I &a, const I &b) const { return a.t < b.t; } + bool operator()(const H &a, const H &b) const { return a.t < b.t; } }; /// BVH build option. @@ -416,12 +572,13 @@ struct BVHBuildOptions { // Set default value: Taabb = 0.2 BVHBuildOptions() - : cost_t_aabb(0.2f), + : cost_t_aabb(static_cast(0.2)), min_leaf_primitives(4), max_tree_depth(256), bin_size(64), - shallow_depth(3), - min_primitives_for_parallel_build(1024 * 128), + shallow_depth(kNANORT_SHALLOW_DEPTH), + min_primitives_for_parallel_build( + kNANORT_MIN_PRIMITIVES_FOR_PARALLEL_BUILD), cache_bbox(false) {} }; @@ -441,22 +598,34 @@ class BVHBuildStatistics { build_secs(0.0f) {} }; -/// BVH trace option. +/// +/// @brief BVH trace option. +/// class BVHTraceOptions { public: // Hit only for face IDs in indexRange. // This feature is good to mimic something like glDrawArrays() unsigned int prim_ids_range[2]; + + // Prim ID to skip for avoiding self-intersection + // -1 = no skipping + unsigned int skip_prim_id; + bool cull_back_face; - unsigned char pad[3]; ///< Padding(not used) + unsigned char pad[3]; ///< Padding (not used) BVHTraceOptions() { prim_ids_range[0] = 0; prim_ids_range[1] = 0x7FFFFFFF; // Up to 2G face IDs. + + skip_prim_id = static_cast(-1); cull_back_face = false; } }; +/// +/// @brief Bounding box. +/// template class BBox { public: @@ -469,40 +638,157 @@ class BBox { } }; -template +/// +/// @brief Hit class for traversing nodes. +/// +/// Stores hit information of node traversal. +/// Node traversal is used for two-level ray tracing(efficient ray traversal of a scene hierarchy) +/// +template +class NodeHit { + public: + NodeHit() + : t_min(std::numeric_limits::max()), + t_max(-std::numeric_limits::max()), + node_id(static_cast(-1)) {} + + NodeHit(const NodeHit &rhs) { + t_min = rhs.t_min; + t_max = rhs.t_max; + node_id = rhs.node_id; + } + + NodeHit &operator=(const NodeHit &rhs) { + t_min = rhs.t_min; + t_max = rhs.t_max; + node_id = rhs.node_id; + + return (*this); + } + + ~NodeHit() {} + + T t_min; + T t_max; + unsigned int node_id; +}; + +/// +/// @brief Comparator object for NodeHit. +/// +/// Comparator object for finding nearest hit point in node traversal. +/// +template +class NodeHitComparator { + public: + inline bool operator()(const NodeHit &a, const NodeHit &b) { + return a.t_min < b.t_min; + } +}; + +/// +/// @brief Bounding Volume Hierarchy acceleration. +/// +/// BVHAccel is central part of ray tracing(ray traversal). +/// BVHAccel takes an input geometry(primitive) information and build a data structure +/// for efficient ray tracing(`O(log2 N)` in theory, where N is the number of primitive in the scene). +/// +/// @tparam T real value type(float or double). +/// +template class BVHAccel { public: BVHAccel() : pad0_(0) { (void)pad0_; } ~BVHAccel() {} + /// /// Build BVH for input primitives. - bool Build(const unsigned int num_primitives, - const BVHBuildOptions &options, const P &p, const Pred &pred); - - /// Get statistics of built BVH tree. Valid after Build() + /// + /// @tparam Prim Primitive(e.g. Triangle) accessor class. + /// @tparam Pred Predicator(comparator class object for `Prim` class to find nearest hit point) + /// + /// @param[in] num_primitives The number of primitive. + /// @param[in] p Primitive accessor class object. + /// @param[in] pred Predicator object. + /// + /// @return true upon success. + /// + template + bool Build(const unsigned int num_primitives, const Prim &p, const Pred &pred, + const BVHBuildOptions &options = BVHBuildOptions()); + + /// + /// Get statistics of built BVH tree. Valid after `Build()` + /// + /// @return BVH build statistics. + /// BVHBuildStatistics GetStatistics() const { return stats_; } +#if defined(NANORT_ENABLE_SERIALIZATION) + /// /// Dump built BVH to the file. - bool Dump(const char *filename); + /// + bool Dump(const char *filename) const; + bool Dump(FILE *fp) const; + /// /// Load BVH binary + /// bool Load(const char *filename); + bool Load(FILE *fp); +#endif + + void Debug(); - /// Traverse into BVH along ray and find closest hit point & primitive if + /// + /// @brief Traverse into BVH along ray and find closest hit point & primitive if /// found - bool Traverse(const Ray &ray, const BVHTraceOptions &options, - const I &intersector) const; + /// + /// @tparam I Intersector class + /// @tparam H Hit class + /// + /// @param[in] ray Input ray + /// @param[in] intersector Intersector object. This object is called for each possible intersection of ray and BVH during traversal. + /// @param[out] isect Intersection point information(filled when closest hit point was found) + /// @param[in] options Traversal options. + /// + /// @return true if the closest hit point found. + /// + template + bool Traverse(const Ray &ray, const I &intersector, H *isect, + const BVHTraceOptions &options = BVHTraceOptions()) const; +#if 0 /// Multi-hit ray traversal /// Returns `max_intersections` frontmost intersections - bool MultiHitTraverse(const Ray &ray, const BVHTraceOptions &optins, + template + bool MultiHitTraverse(const Ray &ray, int max_intersections, - StackVector *intersector) const; + const I &intersector, + StackVector *isects, + const BVHTraceOptions &options = BVHTraceOptions()) const; +#endif + + /// + /// List up nodes which intersects along the ray. + /// This function is useful for two-level BVH traversal. + /// See `examples/nanosg` for example. + /// + /// @tparam I Intersection class + /// + /// + /// + template + bool ListNodeIntersections(const Ray &ray, int max_intersections, + const I &intersector, + StackVector, 128> *hits) const; const std::vector > &GetNodes() const { return nodes_; } const std::vector &GetIndices() const { return indices_; } + /// /// Returns bounding box of built BVH. + /// void BoundingBox(T bmin[3], T bmax[3]) const { if (nodes_.empty()) { bmin[0] = bmin[1] = bmin[2] = std::numeric_limits::max(); @@ -520,7 +806,7 @@ class BVHAccel { bool IsValid() const { return nodes_.size() > 0; } private: -#if NANORT_ENABLE_PARALLEL_BUILD +#if defined(NANORT_ENABLE_PARALLEL_BUILD) typedef struct { unsigned int left_idx; unsigned int right_idx; @@ -531,6 +817,7 @@ class BVHAccel { std::vector shallow_node_infos_; /// Builds shallow BVH tree recursively. + template unsigned int BuildShallowTree(std::vector > *out_nodes, unsigned int left_idx, unsigned int right_idx, unsigned int depth, @@ -539,17 +826,30 @@ class BVHAccel { #endif /// Builds BVH tree recursively. + template unsigned int BuildTree(BVHBuildStatistics *out_stat, std::vector > *out_nodes, unsigned int left_idx, unsigned int right_idx, unsigned int depth, const P &p, const Pred &pred); + template bool TestLeafNode(const BVHNode &node, const Ray &ray, const I &intersector) const; - // bool MultiHitTestLeafNode(IsectVector *isects, int max_intersections, - // const BVHNode &node, const Ray &ray, - // const I &intersector) const; + template + bool TestLeafNodeIntersections( + const BVHNode &node, const Ray &ray, const int max_intersections, + const I &intersector, + std::priority_queue, std::vector >, + NodeHitComparator > *isect_pq) const; + +#if 0 + template + bool MultiHitTestLeafNode(std::priority_queue, Comp> *isect_pq, + int max_intersections, + const BVHNode &node, const Ray &ray, + const I &intersector) const; +#endif std::vector > nodes_; std::vector indices_; // max 4G triangles. @@ -567,11 +867,18 @@ class TriangleSAHPred { const T *vertices, const unsigned int *faces, size_t vertex_stride_bytes) // e.g. 12 for sizeof(float) * XYZ : axis_(0), - pos_(0.0f), + pos_(static_cast(0.0)), vertices_(vertices), faces_(faces), vertex_stride_bytes_(vertex_stride_bytes) {} + TriangleSAHPred(const TriangleSAHPred &rhs) + : axis_(rhs.axis_), + pos_(rhs.pos_), + vertices_(rhs.vertices_), + faces_(rhs.faces_), + vertex_stride_bytes_(rhs.vertex_stride_bytes_) {} + void Set(int axis, T pos) const { axis_ = axis; pos_ = pos; @@ -590,8 +897,7 @@ class TriangleSAHPred { real3 p2(get_vertex_addr(vertices_, i2, vertex_stride_bytes_)); T center = p0[axis] + p1[axis] + p2[axis]; - - return (center < pos * 3.0); + return (center < pos * static_cast(3.0)); } private: @@ -617,42 +923,66 @@ class TriangleMesh { /// This function is called for each primitive in BVH build. void BoundingBox(real3 *bmin, real3 *bmax, unsigned int prim_index) const { - (*bmin)[0] = get_vertex_addr(vertices_, faces_[3 * prim_index + 0], - vertex_stride_bytes_)[0]; - (*bmin)[1] = get_vertex_addr(vertices_, faces_[3 * prim_index + 0], - vertex_stride_bytes_)[1]; - (*bmin)[2] = get_vertex_addr(vertices_, faces_[3 * prim_index + 0], - vertex_stride_bytes_)[2]; - (*bmax)[0] = get_vertex_addr(vertices_, faces_[3 * prim_index + 0], - vertex_stride_bytes_)[0]; - (*bmax)[1] = get_vertex_addr(vertices_, faces_[3 * prim_index + 0], - vertex_stride_bytes_)[1]; - (*bmax)[2] = get_vertex_addr(vertices_, faces_[3 * prim_index + 0], - vertex_stride_bytes_)[2]; + unsigned vertex = faces_[3 * prim_index + 0]; + (*bmin)[0] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[0]; + (*bmin)[1] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[1]; + (*bmin)[2] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[2]; + (*bmax)[0] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[0]; + (*bmax)[1] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[1]; + (*bmax)[2] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[2]; + + // remaining two vertices of the primitive for (unsigned int i = 1; i < 3; i++) { - for (unsigned int k = 0; k < 3; k++) { - if ((*bmin)[static_cast(k)] > - get_vertex_addr(vertices_, faces_[3 * prim_index + i], - vertex_stride_bytes_)[k]) { - (*bmin)[static_cast(k)] = get_vertex_addr( - vertices_, faces_[3 * prim_index + i], vertex_stride_bytes_)[k]; - } - if ((*bmax)[static_cast(k)] < - get_vertex_addr(vertices_, faces_[3 * prim_index + i], - vertex_stride_bytes_)[k]) { - (*bmax)[static_cast(k)] = get_vertex_addr( - vertices_, faces_[3 * prim_index + i], vertex_stride_bytes_)[k]; - } + // xyz + for (int k = 0; k < 3; k++) { + T coord = get_vertex_addr(vertices_, faces_[3 * prim_index + i], + vertex_stride_bytes_)[k]; + + (*bmin)[k] = std::min((*bmin)[k], coord); + (*bmax)[k] = std::max((*bmax)[k], coord); } } } + void BoundingBoxAndCenter(real3* bmin, real3* bmax, real3* center, unsigned int prim_index) const { + unsigned int i0 = faces_[3 * prim_index + 0]; + unsigned int i1 = faces_[3 * prim_index + 1]; + unsigned int i2 = faces_[3 * prim_index + 2]; + + real3 p0(get_vertex_addr(vertices_, i0, vertex_stride_bytes_)); + real3 p1(get_vertex_addr(vertices_, i1, vertex_stride_bytes_)); + real3 p2(get_vertex_addr(vertices_, i2, vertex_stride_bytes_)); + for (int k = 0; k < 3; ++k) { + (*bmin)[k] = std::min(p0[k], std::min(p1[k], p2[k])); + (*bmax)[k] = std::max(p0[k], std::max(p1[k], p2[k])); + } + *center = (p0 + p1 + p2) * (T(1) / T(3)); + } + const T *vertices_; const unsigned int *faces_; const size_t vertex_stride_bytes_; + + // + // Accessors + // + const T *GetVertices() const { + return vertices_; + } + + const unsigned int *GetFaces() const { + return faces_; + } + + size_t GetVertexStrideBytes() const { + return vertex_stride_bytes_; + } }; +/// +/// Stores intersection point information for triangle geometry. +/// template class TriangleIntersection { public: @@ -664,9 +994,31 @@ class TriangleIntersection { unsigned int prim_id; }; -template > +/// +/// Intersector is a template class which implements intersection method and stores +/// intesection point information(`H`) +/// +/// @tparam T Precision(float or double) +/// @tparam H Intersection point information struct +/// +template > class TriangleIntersector { public: + + // Initialize from mesh object. + // M: mesh class + template + TriangleIntersector(const M &m) + : vertices_(m.GetVertices()), + faces_(m.GetFaces()), + vertex_stride_bytes_(m.GetVertexStrideBytes()) {} + + template + TriangleIntersector(const M *m) + : vertices_(m->GetVertices()), + faces_(m->GetFaces()), + vertex_stride_bytes_(m->GetVertexStrideBytes()) {} + TriangleIntersector(const T *vertices, const unsigned int *faces, const size_t vertex_stride_bytes) // e.g. // vertex_stride_bytes @@ -686,16 +1038,20 @@ class TriangleIntersector { int kz; } RayCoeff; - /// Do ray interesection stuff for `prim_index` th primitive and return hit - /// distance `t`, - /// varycentric coordinate `u` and `v`. + /// Do ray intersection stuff for `prim_index` th primitive and return hit + /// distance `t`, barycentric coordinate `u` and `v`. /// Returns true if there's intersection. - bool Intersect(T *t_inout, unsigned int prim_index) const { + bool Intersect(T *t_inout, const unsigned int prim_index) const { if ((prim_index < trace_options_.prim_ids_range[0]) || (prim_index >= trace_options_.prim_ids_range[1])) { return false; } + // Self-intersection test. + if (prim_index == trace_options_.skip_prim_id) { + return false; + } + const unsigned int f0 = faces_[3 * prim_index + 0]; const unsigned int f1 = faces_[3 * prim_index + 1]; const unsigned int f2 = faces_[3 * prim_index + 2]; @@ -719,8 +1075,14 @@ class TriangleIntersector { T V = Ax * Cy - Ay * Cx; T W = Bx * Ay - By * Ax; +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#endif + // Fall back to test against edges using double precision. - if (U == 0.0 || V == 0.0 || W == 0.0) { + if (U == static_cast(0.0) || V == static_cast(0.0) || + W == static_cast(0.0)) { double CxBy = static_cast(Cx) * static_cast(By); double CyBx = static_cast(Cy) * static_cast(Bx); U = static_cast(CxBy - CyBx); @@ -734,16 +1096,21 @@ class TriangleIntersector { W = static_cast(BxAy - ByAx); } - if (trace_options_.cull_back_face) { - if (U < 0.0 || V < 0.0 || W < 0.0) return false; - } else { - if ((U < 0.0 || V < 0.0 || W < 0.0) && (U > 0.0 || V > 0.0 || W > 0.0)) { + if (U < static_cast(0.0) || V < static_cast(0.0) || + W < static_cast(0.0)) { + if (trace_options_.cull_back_face || + (U > static_cast(0.0) || V > static_cast(0.0) || + W > static_cast(0.0))) { return false; } } T det = U + V + W; - if (det == 0.0) return false; + if (det == static_cast(0.0)) return false; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif const T Az = ray_coeff_.Sz * A[ray_coeff_.kz]; const T Bz = ray_coeff_.Sz * B[ray_coeff_.kz]; @@ -757,27 +1124,31 @@ class TriangleIntersector { return false; } + if (tt < t_min_) { + return false; + } + (*t_inout) = tt; - // Use Thomas-Mueller style barycentric coord. + // Use Möller-Trumbore style barycentric coordinates // U + V + W = 1.0 and interp(p) = U * p0 + V * p1 + W * p2 // We want interp(p) = (1 - u - v) * p0 + u * v1 + v * p2; // => u = V, v = W. - intersection.u = V * rcpDet; - intersection.v = W * rcpDet; + u_ = V * rcpDet; + v_ = W * rcpDet; return true; } /// Returns the nearest hit distance. - T GetT() const { return intersection.t; } + T GetT() const { return t_; } - /// Update is called when initializing intesection and nearest hit is found. + /// Update is called when initializing intersection and nearest hit is found. void Update(T t, unsigned int prim_idx) const { - intersection.t = t; - intersection.prim_id = prim_idx; + t_ = t; + prim_id_ = prim_idx; } - /// Prepare BVH traversal(e.g. compute inverse ray direction) + /// Prepare BVH traversal (e.g. compute inverse ray direction) /// This function is called only once in BVH traversal. void PrepareTraversal(const Ray &ray, const BVHTraceOptions &trace_options) const { @@ -802,38 +1173,49 @@ class TriangleIntersector { ray_coeff_.ky = ray_coeff_.kx + 1; if (ray_coeff_.ky == 3) ray_coeff_.ky = 0; - // Swap kx and ky dimention to preserve widing direction of triangles. - if (ray.dir[ray_coeff_.kz] < 0.0f) std::swap(ray_coeff_.kx, ray_coeff_.ky); + // Swap kx and ky dimension to preserve winding direction of triangles. + if (ray.dir[ray_coeff_.kz] < static_cast(0.0)) + std::swap(ray_coeff_.kx, ray_coeff_.ky); - // Claculate shear constants. + // Calculate shear constants. ray_coeff_.Sx = ray.dir[ray_coeff_.kx] / ray.dir[ray_coeff_.kz]; ray_coeff_.Sy = ray.dir[ray_coeff_.ky] / ray.dir[ray_coeff_.kz]; - ray_coeff_.Sz = 1.0f / ray.dir[ray_coeff_.kz]; + ray_coeff_.Sz = static_cast(1.0) / ray.dir[ray_coeff_.kz]; trace_options_ = trace_options; - intersection.u = 0.0f; - intersection.v = 0.0f; + t_min_ = ray.min_t; + + u_ = static_cast(0.0); + v_ = static_cast(0.0); } - /// Post BVH traversal stuff(e.g. compute intersection point information) - /// This function is called only once in BVH traversal. - /// `hit` = true if there is something hit. - void PostTraversal(const Ray &ray, bool hit) const { - if (hit) { - // Do something when there is a hit. + /// Post BVH traversal stuff. + /// Fill `isect` if there is a hit. + void PostTraversal(const Ray &ray, bool hit, H *isect) const { + if (hit && isect) { + (*isect).t = t_; + (*isect).u = u_; + (*isect).v = v_; + (*isect).prim_id = prim_id_; } (void)ray; } + private: const T *vertices_; const unsigned int *faces_; const size_t vertex_stride_bytes_; + mutable real3 ray_org_; mutable RayCoeff ray_coeff_; mutable BVHTraceOptions trace_options_; + mutable T t_min_; - mutable I intersection; + mutable T t_; + mutable T u_; + mutable T v_; + mutable unsigned int prim_id_; }; // @@ -853,16 +1235,32 @@ const T &safemax(const T &a, const T &b) { // // SAH functions // +template +struct Bin { + BBox bbox; + size_t count; + T cost; + + Bin() + : count(0), cost(0) + { + // Note: bbox is initialized to be empty + } +}; + +template struct BinBuffer { explicit BinBuffer(unsigned int size) { bin_size = size; - bin.resize(2 * 3 * size); + bin.resize(3 * size); clear(); } - void clear() { memset(&bin[0], 0, sizeof(size_t) * 2 * 3 * bin_size); } + void clear() { + std::fill(bin.begin(), bin.end(), Bin()); + } - std::vector bin; // (min, max) * xyz * binsize + std::vector > bin; unsigned int bin_size; unsigned int pad0; }; @@ -870,7 +1268,8 @@ struct BinBuffer { template inline T CalculateSurfaceArea(const real3 &min, const real3 &max) { real3 box = max - min; - return static_cast(2.0) * (box[0] * box[1] + box[1] * box[2] + box[2] * box[0]); + return static_cast(2.0) * + (box[0] * box[1] + box[1] * box[2] + box[2] * box[0]); } template @@ -903,7 +1302,7 @@ inline void GetBoundingBoxOfTriangle(real3 *bmin, real3 *bmax, } template -inline void ContributeBinBuffer(BinBuffer *bins, // [out] +inline void ContributeBinBuffer(BinBuffer *bins, // [out] const real3 &scene_min, const real3 &scene_max, unsigned int *indices, unsigned int left_idx, @@ -913,61 +1312,46 @@ inline void ContributeBinBuffer(BinBuffer *bins, // [out] // Calculate extent real3 scene_size, scene_inv_size; scene_size = scene_max - scene_min; + for (int i = 0; i < 3; ++i) { - assert(scene_size[i] >= 0.0); + assert(scene_size[i] >= static_cast(0.0)); - if (scene_size[i] > 0.0) { + if (scene_size[i] > static_cast(0.0)) { scene_inv_size[i] = bin_size / scene_size[i]; } else { - scene_inv_size[i] = 0.0; + scene_inv_size[i] = static_cast(0.0); } } - // Clear bin data - std::fill(bins->bin.begin(), bins->bin.end(), 0); - // memset(&bins->bin[0], 0, sizeof(2 * 3 * bins->bin_size)); - - size_t idx_bmin[3]; - size_t idx_bmax[3]; + bins->clear(); for (size_t i = left_idx; i < right_idx; i++) { // - // Quantize the position into [0, BIN_SIZE) + // Quantize the center position into [0, BIN_SIZE) // // q[i] = (int)(p[i] - scene_bmin) / scene_size // - real3 bmin; - real3 bmax; - - p.BoundingBox(&bmin, &bmax, indices[i]); - // GetBoundingBoxOfTriangle(&bmin, &bmax, vertices, faces, indices[i]); - real3 quantized_bmin = (bmin - scene_min) * scene_inv_size; - real3 quantized_bmax = (bmax - scene_min) * scene_inv_size; + real3 bmin, bmax, center; + p.BoundingBoxAndCenter(&bmin, &bmax, ¢er, indices[i]); + real3 quantized_center = (center - scene_min) * scene_inv_size; - // idx is now in [0, BIN_SIZE) for (int j = 0; j < 3; ++j) { - int q0 = static_cast(quantized_bmin[j]); - if (q0 < 0) q0 = 0; - int q1 = static_cast(quantized_bmax[j]); - if (q1 < 0) q1 = 0; - - idx_bmin[j] = static_cast(q0); - idx_bmax[j] = static_cast(q1); - - if (idx_bmin[j] >= bin_size) - idx_bmin[j] = static_cast(bin_size) - 1; - if (idx_bmax[j] >= bin_size) - idx_bmax[j] = static_cast(bin_size) - 1; - - assert(idx_bmin[j] < bin_size); - assert(idx_bmax[j] < bin_size); - - // Increment bin counter - bins->bin[0 * (bins->bin_size * 3) + - static_cast(j) * bins->bin_size + idx_bmin[j]] += 1; - bins->bin[1 * (bins->bin_size * 3) + - static_cast(j) * bins->bin_size + idx_bmax[j]] += 1; + // idx is now in [0, BIN_SIZE) + unsigned idx = std::min(bins->bin_size - 1, unsigned(std::max(0, int(quantized_center[j])))); + + // Increment bin counter + extend bounding box of bin + unsigned int bin_idx = static_cast(j) * bins->bin_size + idx; + + // TODO: assert when out-of-bounds access?. + if (bin_idx < bins->bin_size) { + Bin& bin = bins->bin[static_cast(j) * bins->bin_size + idx]; + bin.count++; + for (int k = 0; k < 3; ++k) { + bin.bbox.bmin[k] = std::min(bin.bbox.bmin[k], bmin[k]); + bin.bbox.bmax[k] = std::max(bin.bbox.bmax[k], bmax[k]); + } + } } } } @@ -977,7 +1361,8 @@ inline T SAH(size_t ns1, T leftArea, size_t ns2, T rightArea, T invS, T Taabb, T Ttri) { T sah; - sah = static_cast(2.0) * Taabb + (leftArea * invS) * static_cast(ns1) * Ttri + + sah = static_cast(2.0) * Taabb + + (leftArea * invS) * static_cast(ns1) * Ttri + (rightArea * invS) * static_cast(ns2) * Ttri; return sah; @@ -986,103 +1371,50 @@ inline T SAH(size_t ns1, T leftArea, size_t ns2, T rightArea, T invS, T Taabb, template inline bool FindCutFromBinBuffer(T *cut_pos, // [out] xyz int *minCostAxis, // [out] - const BinBuffer *bins, const real3 &bmin, - const real3 &bmax, size_t num_primitives, - T costTaabb) { // should be in [0.0, 1.0] - const T kEPS = std::numeric_limits::epsilon(); // * epsScale; - - size_t left, right; - real3 bsize, bstep; - real3 bminLeft, bmaxLeft; - real3 bminRight, bmaxRight; - T saLeft, saRight, saTotal; - T pos; + BinBuffer *bins, const real3 &bmin, + const real3 &bmax) { T minCost[3]; - - T costTtri = 1.0f - costTaabb; - - (*minCostAxis) = 0; - - bsize = bmax - bmin; - bstep = bsize * (1.0f / bins->bin_size); - saTotal = CalculateSurfaceArea(bmin, bmax); - - T invSaTotal = 0.0f; - if (saTotal > kEPS) { - invSaTotal = 1.0f / saTotal; - } - for (int j = 0; j < 3; ++j) { - // - // Compute SAH cost for right side of each cell of the bbox. - // Exclude both extreme side of the bbox. - // - // i: 0 1 2 3 - // +----+----+----+----+----+ - // | | | | | | - // +----+----+----+----+----+ - // - - T minCostPos = bmin[j] + 0.5f * bstep[j]; minCost[j] = std::numeric_limits::max(); - left = 0; - right = num_primitives; - bminLeft = bminRight = bmin; - bmaxLeft = bmaxRight = bmax; - - for (int i = 0; i < static_cast(bins->bin_size) - 1; ++i) { - left += bins->bin[0 * (3 * bins->bin_size) + - static_cast(j) * bins->bin_size + - static_cast(i)]; - right -= bins->bin[1 * (3 * bins->bin_size) + - static_cast(j) * bins->bin_size + - static_cast(i)]; - - assert(left <= num_primitives); - assert(right <= num_primitives); - - // - // Split pos bmin + (i + 1) * (bsize / BIN_SIZE) - // +1 for i since we want a position on right side of the cell. - // - - pos = bmin[j] + (i + 0.5f) * bstep[j]; - bmaxLeft[j] = pos; - bminRight[j] = pos; - - saLeft = CalculateSurfaceArea(bminLeft, bmaxLeft); - saRight = CalculateSurfaceArea(bminRight, bmaxRight); + // Sweep left to accumulate bounding boxes and compute the right-hand side of the cost + size_t count = 0; + BBox accumulated_bbox; + for (size_t i = bins->bin_size - 1; i > 0; --i) { + Bin& bin = bins->bin[static_cast(j) * bins->bin_size + i]; + for (int k = 0; k < 3; ++k) { + accumulated_bbox.bmin[k] = std::min(bin.bbox.bmin[k], accumulated_bbox.bmin[k]); + accumulated_bbox.bmax[k] = std::max(bin.bbox.bmax[k], accumulated_bbox.bmax[k]); + } + count += bin.count; + bin.cost = T(count) * CalculateSurfaceArea(accumulated_bbox.bmin, accumulated_bbox.bmax); + } - T cost = - SAH(left, saLeft, right, saRight, invSaTotal, costTaabb, costTtri); + // Sweep right to compute the full cost + count = 0; + accumulated_bbox = BBox(); + size_t minBin = 1; + for (size_t i = 0; i < bins->bin_size - 1; i++) { + Bin& bin = bins->bin[static_cast(j) * bins->bin_size + i]; + Bin& next_bin = bins->bin[static_cast(j) * bins->bin_size + i + 1]; + for (int k = 0; k < 3; ++k) { + accumulated_bbox.bmin[k] = std::min(bin.bbox.bmin[k], accumulated_bbox.bmin[k]); + accumulated_bbox.bmax[k] = std::max(bin.bbox.bmax[k], accumulated_bbox.bmax[k]); + } + count += bin.count; + // Traversal cost and intersection cost are irrelevant for minimization + T cost = T(count) * CalculateSurfaceArea(accumulated_bbox.bmin, accumulated_bbox.bmax) + next_bin.cost; if (cost < minCost[j]) { - // - // Update the min cost - // minCost[j] = cost; - minCostPos = pos; - // minCostAxis = j; + // Store the beginning of the right partition + minBin = i + 1; } } - - cut_pos[j] = minCostPos; - } - - // cut_axis = minCostAxis; - // cut_pos = minCostPos; - - // Find min cost axis - T cost = minCost[0]; - (*minCostAxis) = 0; - if (cost > minCost[1]) { - (*minCostAxis) = 1; - cost = minCost[1]; - } - if (cost > minCost[2]) { - (*minCostAxis) = 2; - cost = minCost[2]; + cut_pos[j] = T(minBin) * ((bmax[j] - bmin[j]) / T(bins->bin_size)) + bmin[j]; } + *minCostAxis = 0; + if (minCost[0] > minCost[1]) *minCostAxis = 1; + if (minCost[*minCostAxis] > minCost[2]) *minCostAxis = 2; return true; } @@ -1101,33 +1433,100 @@ void ComputeBoundingBoxOMP(real3 *bmin, real3 *bmax, #pragma omp parallel firstprivate(local_bmin, local_bmax) if (n > (1024 * 128)) { -#pragma omp for - for (size_t i = left_index; i < right_index; i++) { // for each faces +#pragma omp parallel for + // for each face + for (int i = int(left_index); i < int(right_index); i++) { unsigned int idx = indices[i]; real3 bbox_min, bbox_max; + p.BoundingBox(&bbox_min, &bbox_max, idx); - for (int k = 0; k < 3; k++) { // xyz - if ((*bmin)[k] > bbox_min[k]) (*bmin)[k] = bbox_min[k]; - if ((*bmax)[k] < bbox_max[k]) (*bmax)[k] = bbox_max[k]; + + // xyz + for (int k = 0; k < 3; k++) { + (*bmin)[k] = std::min((*bmin)[k], bbox_min[k]); + (*bmax)[k] = std::max((*bmax)[k], bbox_max[k]); } } #pragma omp critical { for (int k = 0; k < 3; k++) { - if (local_bmin[k] < (*bmin)[k]) { - { - if (local_bmin[k] < (*bmin)[k]) (*bmin)[k] = local_bmin[k]; - } - } + (*bmin)[k] = std::min((*bmin)[k], local_bmin[k]); + (*bmax)[k] = std::max((*bmax)[k], local_bmax[k]); + } + } + } +} +#endif - if (local_bmax[k] > (*bmax)[k]) { - { - if (local_bmax[k] > (*bmax)[k]) (*bmax)[k] = local_bmax[k]; - } +#ifdef NANORT_USE_CPP11_FEATURE +template +inline void ComputeBoundingBoxThreaded(real3 *bmin, real3 *bmax, + const unsigned int *indices, + unsigned int left_index, + unsigned int right_index, const P &p) { + unsigned int n = right_index - left_index; + + size_t num_threads = std::min( + size_t(kNANORT_MAX_THREADS), + std::max(size_t(1), size_t(std::thread::hardware_concurrency()))); + + if (n < num_threads) { + num_threads = n; + } + + std::vector workers; + + size_t ndiv = n / num_threads; + + std::vector local_bmins(3 * num_threads); // 3 = xyz + std::vector local_bmaxs(3 * num_threads); // 3 = xyz + + for (size_t t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&, t]() { + size_t si = left_index + t * ndiv; + size_t ei = (t == (num_threads - 1)) ? size_t(right_index) : std::min(left_index + (t + 1) * ndiv, size_t(right_index)); + + local_bmins[3 * t + 0] = std::numeric_limits::infinity(); + local_bmins[3 * t + 1] = std::numeric_limits::infinity(); + local_bmins[3 * t + 2] = std::numeric_limits::infinity(); + local_bmaxs[3 * t + 0] = -std::numeric_limits::infinity(); + local_bmaxs[3 * t + 1] = -std::numeric_limits::infinity(); + local_bmaxs[3 * t + 2] = -std::numeric_limits::infinity(); + + // for each face + for (size_t i = si; i < ei; i++) { + unsigned int idx = indices[i]; + + real3 bbox_min, bbox_max; + p.BoundingBox(&bbox_min, &bbox_max, idx); + + // xyz + for (size_t k = 0; k < 3; k++) { + local_bmins[3 * t + k] = + std::min(local_bmins[3 * t + k], bbox_min[int(k)]); + local_bmaxs[3 * t + k] = + std::max(local_bmaxs[3 * t + k], bbox_max[int(k)]); } } + })); + } + + for (auto &t : workers) { + t.join(); + } + + // merge bbox + for (size_t k = 0; k < 3; k++) { + (*bmin)[int(k)] = local_bmins[k]; + (*bmax)[int(k)] = local_bmaxs[k]; + } + + for (size_t t = 1; t < num_threads; t++) { + for (size_t k = 0; k < 3; k++) { + (*bmin)[int(k)] = std::min((*bmin)[int(k)], local_bmins[3 * t + k]); + (*bmax)[int(k)] = std::max((*bmax)[int(k)], local_bmaxs[3 * t + k]); } } } @@ -1138,20 +1537,20 @@ inline void ComputeBoundingBox(real3 *bmin, real3 *bmax, const unsigned int *indices, unsigned int left_index, unsigned int right_index, const P &p) { - { - unsigned int idx = indices[left_index]; - p.BoundingBox(bmin, bmax, idx); - } + unsigned int idx = indices[left_index]; + p.BoundingBox(bmin, bmax, idx); { - for (unsigned int i = left_index + 1; i < right_index; - i++) { // for each primitives - unsigned int idx = indices[i]; + // for each primitive + for (unsigned int i = left_index + 1; i < right_index; i++) { + idx = indices[i]; real3 bbox_min, bbox_max; p.BoundingBox(&bbox_min, &bbox_max, idx); - for (int k = 0; k < 3; k++) { // xyz - if ((*bmin)[k] > bbox_min[k]) (*bmin)[k] = bbox_min[k]; - if ((*bmax)[k] < bbox_max[k]) (*bmax)[k] = bbox_max[k]; + + // xyz + for (int k = 0; k < 3; k++) { + (*bmin)[k] = std::min((*bmin)[k], bbox_min[k]); + (*bmax)[k] = std::max((*bmax)[k], bbox_max[k]); } } } @@ -1162,35 +1561,24 @@ inline void GetBoundingBox(real3 *bmin, real3 *bmax, const std::vector > &bboxes, unsigned int *indices, unsigned int left_index, unsigned int right_index) { - { - unsigned int i = left_index; - unsigned int idx = indices[i]; - (*bmin)[0] = bboxes[idx].bmin[0]; - (*bmin)[1] = bboxes[idx].bmin[1]; - (*bmin)[2] = bboxes[idx].bmin[2]; - (*bmax)[0] = bboxes[idx].bmax[0]; - (*bmax)[1] = bboxes[idx].bmax[1]; - (*bmax)[2] = bboxes[idx].bmax[2]; - } - - T local_bmin[3] = {(*bmin)[0], (*bmin)[1], (*bmin)[2]}; - T local_bmax[3] = {(*bmax)[0], (*bmax)[1], (*bmax)[2]}; + unsigned int i = left_index; + unsigned int idx = indices[i]; - { - for (unsigned int i = left_index; i < right_index; i++) { // for each faces - unsigned int idx = indices[i]; + (*bmin)[0] = bboxes[idx].bmin[0]; + (*bmin)[1] = bboxes[idx].bmin[1]; + (*bmin)[2] = bboxes[idx].bmin[2]; + (*bmax)[0] = bboxes[idx].bmax[0]; + (*bmax)[1] = bboxes[idx].bmax[1]; + (*bmax)[2] = bboxes[idx].bmax[2]; - for (int k = 0; k < 3; k++) { // xyz - T minval = bboxes[idx].bmin[k]; - T maxval = bboxes[idx].bmax[k]; - if (local_bmin[k] > minval) local_bmin[k] = minval; - if (local_bmax[k] < maxval) local_bmax[k] = maxval; - } - } + // for each face + for (i = left_index + 1; i < right_index; i++) { + idx = indices[i]; + // xyz for (int k = 0; k < 3; k++) { - (*bmin)[k] = local_bmin[k]; - (*bmax)[k] = local_bmax[k]; + (*bmin)[k] = std::min((*bmin)[k], bboxes[idx].bmin[k]); + (*bmax)[k] = std::max((*bmax)[k], bboxes[idx].bmax[k]); } } } @@ -1199,12 +1587,15 @@ inline void GetBoundingBox(real3 *bmin, real3 *bmax, // -- // -#if NANORT_ENABLE_PARALLEL_BUILD -template -unsigned int BVHAccel::BuildShallowTree( - std::vector > *out_nodes, unsigned int left_idx, - unsigned int right_idx, unsigned int depth, unsigned int max_shallow_depth, - const P &p, const Pred &pred) { +#if defined(NANORT_ENABLE_PARALLEL_BUILD) +template +template +unsigned int BVHAccel::BuildShallowTree(std::vector > *out_nodes, + unsigned int left_idx, + unsigned int right_idx, + unsigned int depth, + unsigned int max_shallow_depth, + const P &p, const Pred &pred) { assert(left_idx <= right_idx); unsigned int offset = static_cast(out_nodes->size()); @@ -1214,10 +1605,16 @@ unsigned int BVHAccel::BuildShallowTree( } real3 bmin, bmax; + +#if defined(NANORT_USE_CPP11_FEATURE) && defined(NANORT_ENABLE_PARALLEL_BUILD) + ComputeBoundingBoxThreaded(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, + p); +#else ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, p); +#endif unsigned int n = right_idx - left_idx; - if ((n < options_.min_leaf_primitives) || + if ((n <= options_.min_leaf_primitives) || (depth >= options_.max_tree_depth)) { // Create leaf node. BVHNode leaf; @@ -1263,32 +1660,35 @@ unsigned int BVHAccel::BuildShallowTree( return offset; } else { + // + // TODO(LTE): multi-threaded SAH computation, or use simple object median or + // spacial median for shallow tree to speeding up the parallel build. + // + // // Compute SAH and find best split axis and position // int min_cut_axis = 0; T cut_pos[3] = {0.0, 0.0, 0.0}; - BinBuffer bins(options_.bin_size); + BinBuffer bins(options_.bin_size); ContributeBinBuffer(&bins, bmin, bmax, &indices_.at(0), left_idx, right_idx, p); - FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax, n, - options_.cost_t_aabb); + FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax); // Try all 3 axis until good cut position avaiable. unsigned int mid_idx = left_idx; int cut_axis = min_cut_axis; + for (int axis_try = 0; axis_try < 3; axis_try++) { unsigned int *begin = &indices_[left_idx]; unsigned int *end = - &indices_[right_idx - 1] + 1; // mimics end() iterator. + &indices_[right_idx - 1] + 1; // mimics end() iterator unsigned int *mid = 0; // try min_cut_axis first. cut_axis = (min_cut_axis + axis_try) % 3; - // @fixme { We want some thing like: std::partition(begin, end, - // pred(cut_axis, cut_pos[cut_axis])); } pred.Set(cut_axis, cut_pos[cut_axis]); // // Split at (cut_axis, cut_pos) @@ -1297,13 +1697,14 @@ unsigned int BVHAccel::BuildShallowTree( mid = std::partition(begin, end, pred); mid_idx = left_idx + static_cast((mid - begin)); + if ((mid_idx == left_idx) || (mid_idx == right_idx)) { // Can't split well. - // Switch to object median(which may create unoptimized tree, but + // Switch to object median (which may create unoptimized tree, but // stable) mid_idx = left_idx + (n >> 1); - // Try another axis if there's axis to try. + // Try another axis if there's an axis to try. } else { // Found good cut. exit loop. @@ -1326,6 +1727,7 @@ unsigned int BVHAccel::BuildShallowTree( right_child_index = BuildShallowTree(out_nodes, mid_idx, right_idx, depth + 1, max_shallow_depth, p, pred); + //std::cout << "shallow[" << offset << "] l and r = " << left_child_index << ", " << right_child_index << std::endl; (*out_nodes)[offset].data[0] = left_child_index; (*out_nodes)[offset].data[1] = right_child_index; @@ -1344,11 +1746,13 @@ unsigned int BVHAccel::BuildShallowTree( } #endif -template -unsigned int BVHAccel::BuildTree( - BVHBuildStatistics *out_stat, std::vector > *out_nodes, - unsigned int left_idx, unsigned int right_idx, unsigned int depth, - const P &p, const Pred &pred) { +template +template +unsigned int BVHAccel::BuildTree(BVHBuildStatistics *out_stat, + std::vector > *out_nodes, + unsigned int left_idx, + unsigned int right_idx, unsigned int depth, + const P &p, const Pred &pred) { assert(left_idx <= right_idx); unsigned int offset = static_cast(out_nodes->size()); @@ -1365,7 +1769,7 @@ unsigned int BVHAccel::BuildTree( } unsigned int n = right_idx - left_idx; - if ((n < options_.min_leaf_primitives) || + if ((n <= options_.min_leaf_primitives) || (depth >= options_.max_tree_depth)) { // Create leaf node. BVHNode leaf; @@ -1401,15 +1805,15 @@ unsigned int BVHAccel::BuildTree( int min_cut_axis = 0; T cut_pos[3] = {0.0, 0.0, 0.0}; - BinBuffer bins(options_.bin_size); + BinBuffer bins(options_.bin_size); ContributeBinBuffer(&bins, bmin, bmax, &indices_.at(0), left_idx, right_idx, p); - FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax, n, - options_.cost_t_aabb); + FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax); // Try all 3 axis until good cut position avaiable. unsigned int mid_idx = left_idx; int cut_axis = min_cut_axis; + for (int axis_try = 0; axis_try < 3; axis_try++) { unsigned int *begin = &indices_[left_idx]; unsigned int *end = &indices_[right_idx - 1] + 1; // mimics end() iterator. @@ -1427,6 +1831,7 @@ unsigned int BVHAccel::BuildTree( mid = std::partition(begin, end, pred); mid_idx = left_idx + static_cast((mid - begin)); + if ((mid_idx == left_idx) || (mid_idx == right_idx)) { // Can't split well. // Switch to object median(which may create unoptimized tree, but @@ -1474,18 +1879,25 @@ unsigned int BVHAccel::BuildTree( return offset; } -template -bool BVHAccel::Build(unsigned int num_primitives, - const BVHBuildOptions &options, - const P &p, const Pred &pred) { +template +template +bool BVHAccel::Build(unsigned int num_primitives, const Prim &p, + const Pred &pred, const BVHBuildOptions &options) { options_ = options; stats_ = BVHBuildStatistics(); nodes_.clear(); bboxes_.clear(); +#if defined(NANORT_ENABLE_PARALLEL_BUILD) + shallow_node_infos_.clear(); +#endif assert(options_.bin_size > 1); + if (num_primitives == 0) { + return false; + } + unsigned int n = num_primitives; // @@ -1493,41 +1905,75 @@ bool BVHAccel::Build(unsigned int num_primitives, // indices_.resize(n); +#if defined(NANORT_USE_CPP11_FEATURE) + { + size_t num_threads = std::min( + size_t(kNANORT_MAX_THREADS), + std::max(size_t(1), size_t(std::thread::hardware_concurrency()))); + + if (n < num_threads) { + num_threads = n; + } + + std::vector workers; + + size_t ndiv = n / num_threads; + + for (size_t t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&, t]() { + size_t si = t * ndiv; + size_t ei = (t == (num_threads - 1)) ? n : std::min((t + 1) * ndiv, size_t(n)); + + for (size_t k = si; k < ei; k++) { + indices_[k] = static_cast(k); + } + })); + } + + for (auto &t : workers) { + t.join(); + } + } + +#else + #ifdef _OPENMP #pragma omp parallel for #endif for (int i = 0; i < static_cast(n); i++) { indices_[static_cast(i)] = static_cast(i); } +#endif // !NANORT_USE_CPP11_FEATURE // - // 2. Compute bounding box(optional). + // 2. Compute bounding box (optional). // real3 bmin, bmax; + if (options.cache_bbox) { bmin[0] = bmin[1] = bmin[2] = std::numeric_limits::max(); bmax[0] = bmax[1] = bmax[2] = -std::numeric_limits::max(); bboxes_.resize(n); - for (size_t i = 0; i < n; i++) { // for each primitived + + for (size_t i = 0; i < n; i++) { // for each primitive unsigned int idx = indices_[i]; BBox bbox; p.BoundingBox(&(bbox.bmin), &(bbox.bmax), static_cast(i)); bboxes_[idx] = bbox; - for (int k = 0; k < 3; k++) { // xyz - if (bmin[k] > bbox.bmin[k]) { - bmin[k] = bbox.bmin[k]; - } - if (bmax[k] < bbox.bmax[k]) { - bmax[k] = bbox.bmax[k]; - } + // xyz + for (int k = 0; k < 3; k++) { + bmin[k] = std::min(bmin[k], bbox.bmin[k]); + bmax[k] = std::max(bmax[k], bbox.bmax[k]); } } } else { -#ifdef _OPENMP +#if defined(NANORT_USE_CPP11_FEATURE) + ComputeBoundingBoxThreaded(&bmin, &bmax, &indices_.at(0), 0, n, p); +#elif defined(_OPENMP) ComputeBoundingBoxOMP(&bmin, &bmax, &indices_.at(0), 0, n, p); #else ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), 0, n, p); @@ -1537,10 +1983,88 @@ bool BVHAccel::Build(unsigned int num_primitives, // // 3. Build tree // -#ifdef _OPENMP -#if NANORT_ENABLE_PARALLEL_BUILD +#if defined(NANORT_ENABLE_PARALLEL_BUILD) +#if defined(NANORT_USE_CPP11_FEATURE) + + // Do parallel build for large enough datasets. + if (n > options.min_primitives_for_parallel_build) { + BuildShallowTree(&nodes_, 0, n, /* root depth */ 0, options.shallow_depth, + p, pred); // [0, n) + + assert(shallow_node_infos_.size() > 0); + + // Build deeper tree in parallel + std::vector > > local_nodes( + shallow_node_infos_.size()); + std::vector local_stats(shallow_node_infos_.size()); + + size_t num_threads = std::min( + size_t(kNANORT_MAX_THREADS), + std::max(size_t(1), size_t(std::thread::hardware_concurrency()))); + if (shallow_node_infos_.size() < num_threads) { + num_threads = shallow_node_infos_.size(); + } + + std::vector workers; + std::atomic i(0); + + for (size_t t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + uint32_t idx = 0; + while ((idx = (i++)) < shallow_node_infos_.size()) { + // Create thread-local copy of Pred since some mutable variables are + // modified during SAH computation. + const Pred local_pred = pred; + unsigned int left_idx = shallow_node_infos_[size_t(idx)].left_idx; + unsigned int right_idx = shallow_node_infos_[size_t(idx)].right_idx; + BuildTree(&(local_stats[size_t(idx)]), &(local_nodes[size_t(idx)]), + left_idx, right_idx, options.shallow_depth, p, local_pred); + } + })); + } + + for (auto &t : workers) { + t.join(); + } + + // Join local nodes + for (size_t ii = 0; ii < local_nodes.size(); ii++) { + assert(!local_nodes[ii].empty()); + size_t offset = nodes_.size(); + + // Add offset to child index (for branch node). + for (size_t j = 0; j < local_nodes[ii].size(); j++) { + if (local_nodes[ii][j].flag == 0) { // branch + local_nodes[ii][j].data[0] += offset - 1; + local_nodes[ii][j].data[1] += offset - 1; + } + } + + // replace + nodes_[shallow_node_infos_[ii].offset] = local_nodes[ii][0]; + + // Skip root element of the local node. + nodes_.insert(nodes_.end(), local_nodes[ii].begin() + 1, + local_nodes[ii].end()); + } + + // Join statistics + for (size_t ii = 0; ii < local_nodes.size(); ii++) { + stats_.max_tree_depth = + std::max(stats_.max_tree_depth, local_stats[ii].max_tree_depth); + stats_.num_leaf_nodes += local_stats[ii].num_leaf_nodes; + stats_.num_branch_nodes += local_stats[ii].num_branch_nodes; + } + + } else { + // Single thread. + BuildTree(&stats_, &nodes_, 0, n, + /* root depth */ 0, p, pred); // [0, n) + } + +#elif defined(_OPENMP) - // Do parallel build for enoughly large dataset. + // Do parallel build for large enough datasets. if (n > options.min_primitives_for_parallel_build) { BuildShallowTree(&nodes_, 0, n, /* root depth */ 0, options.shallow_depth, p, pred); // [0, n) @@ -1554,18 +2078,19 @@ bool BVHAccel::Build(unsigned int num_primitives, #pragma omp parallel for for (int i = 0; i < static_cast(shallow_node_infos_.size()); i++) { - unsigned int left_idx = shallow_node_infos_[i].left_idx; - unsigned int right_idx = shallow_node_infos_[i].right_idx; - BuildTree(&(local_stats[i]), &(local_nodes[i]), left_idx, right_idx, - options.shallow_depth, p, pred); + unsigned int left_idx = shallow_node_infos_[size_t(i)].left_idx; + unsigned int right_idx = shallow_node_infos_[size_t(i)].right_idx; + const Pred local_pred = pred; + BuildTree(&(local_stats[size_t(i)]), &(local_nodes[size_t(i)]), left_idx, + right_idx, options.shallow_depth, p, local_pred); } // Join local nodes - for (int i = 0; i < static_cast(local_nodes.size()); i++) { - assert(!local_nodes[i].empty()); + for (size_t i = 0; i < local_nodes.size(); i++) { + assert(!local_nodes[size_t(i)].empty()); size_t offset = nodes_.size(); - // Add offset to child index(for branch node). + // Add offset to child index (for branch node). for (size_t j = 0; j < local_nodes[i].size(); j++) { if (local_nodes[i][j].flag == 0) { // branch local_nodes[i][j].data[0] += offset - 1; @@ -1582,7 +2107,7 @@ bool BVHAccel::Build(unsigned int num_primitives, } // Join statistics - for (int i = 0; i < static_cast(local_nodes.size()); i++) { + for (size_t i = 0; i < local_nodes.size(); i++) { stats_.max_tree_depth = std::max(stats_.max_tree_depth, local_stats[i].max_tree_depth); stats_.num_leaf_nodes += local_stats[i].num_leaf_nodes; @@ -1590,6 +2115,7 @@ bool BVHAccel::Build(unsigned int num_primitives, } } else { + // Single thread BuildTree(&stats_, &nodes_, 0, n, /* root depth */ 0, p, pred); // [0, n) } @@ -1601,6 +2127,8 @@ bool BVHAccel::Build(unsigned int num_primitives, } #endif #else // !_OPENMP + + // Single thread BVH build { BuildTree(&stats_, &nodes_, 0, n, /* root depth */ 0, p, pred); // [0, n) @@ -1610,11 +2138,25 @@ bool BVHAccel::Build(unsigned int num_primitives, return true; } -template -bool BVHAccel::Dump(const char *filename) { +template +void BVHAccel::Debug() { + for (size_t i = 0; i < indices_.size(); i++) { + printf("index[%d] = %d\n", int(i), int(indices_[i])); + } + + for (size_t i = 0; i < nodes_.size(); i++) { + printf("node[%d] : bmin %f, %f, %f, bmax %f, %f, %f\n", int(i), + nodes_[i].bmin[0], nodes_[i].bmin[1], nodes_[i].bmin[2], + nodes_[i].bmax[0], nodes_[i].bmax[1], nodes_[i].bmax[2]); + } +} + +#if defined(NANORT_ENABLE_SERIALIZATION) +template +bool BVHAccel::Dump(const char *filename) const { FILE *fp = fopen(filename, "wb"); if (!fp) { - fprintf(stderr, "[BVHAccel] Cannot write a file: %s\n", filename); + // fprintf(stderr, "[BVHAccel] Cannot write a file: %s\n", filename); return false; } @@ -1641,11 +2183,34 @@ bool BVHAccel::Dump(const char *filename) { return true; } -template -bool BVHAccel::Load(const char *filename) { +template +bool BVHAccel::Dump(FILE *fp) const { + size_t numNodes = nodes_.size(); + assert(nodes_.size() > 0); + + size_t numIndices = indices_.size(); + + size_t r = 0; + r = fwrite(&numNodes, sizeof(size_t), 1, fp); + assert(r == 1); + + r = fwrite(&nodes_.at(0), sizeof(BVHNode), numNodes, fp); + assert(r == numNodes); + + r = fwrite(&numIndices, sizeof(size_t), 1, fp); + assert(r == 1); + + r = fwrite(&indices_.at(0), sizeof(unsigned int), numIndices, fp); + assert(r == numIndices); + + return true; +} + +template +bool BVHAccel::Load(const char *filename) { FILE *fp = fopen(filename, "rb"); if (!fp) { - fprintf(stderr, "Cannot open file: %s\n", filename); + // fprintf(stderr, "Cannot open file: %s\n", filename); return false; } @@ -1674,34 +2239,68 @@ bool BVHAccel::Load(const char *filename) { return true; } +template +bool BVHAccel::Load(FILE *fp) { + size_t numNodes; + size_t numIndices; + + size_t r = 0; + r = fread(&numNodes, sizeof(size_t), 1, fp); + assert(r == 1); + assert(numNodes > 0); + + nodes_.resize(numNodes); + r = fread(&nodes_.at(0), sizeof(BVHNode), numNodes, fp); + assert(r == numNodes); + + r = fread(&numIndices, sizeof(size_t), 1, fp); + assert(r == 1); + + indices_.resize(numIndices); + + r = fread(&indices_.at(0), sizeof(unsigned int), numIndices, fp); + assert(r == numIndices); + + return true; +} +#endif + template inline bool IntersectRayAABB(T *tminOut, // [out] T *tmaxOut, // [out] T min_t, T max_t, const T bmin[3], const T bmax[3], real3 ray_org, real3 ray_inv_dir, - int ray_dir_sign[3]) { - T tmin, tmax; - - const T min_x = ray_dir_sign[0] ? bmax[0] : bmin[0]; - const T min_y = ray_dir_sign[1] ? bmax[1] : bmin[1]; - const T min_z = ray_dir_sign[2] ? bmax[2] : bmin[2]; - const T max_x = ray_dir_sign[0] ? bmin[0] : bmax[0]; - const T max_y = ray_dir_sign[1] ? bmin[1] : bmax[1]; - const T max_z = ray_dir_sign[2] ? bmin[2] : bmax[2]; + int ray_dir_sign[3]); +template <> +inline bool IntersectRayAABB(float *tminOut, // [out] + float *tmaxOut, // [out] + float min_t, float max_t, + const float bmin[3], const float bmax[3], + real3 ray_org, + real3 ray_inv_dir, + int ray_dir_sign[3]) { + float tmin, tmax; + + const float min_x = ray_dir_sign[0] ? bmax[0] : bmin[0]; + const float min_y = ray_dir_sign[1] ? bmax[1] : bmin[1]; + const float min_z = ray_dir_sign[2] ? bmax[2] : bmin[2]; + const float max_x = ray_dir_sign[0] ? bmin[0] : bmax[0]; + const float max_y = ray_dir_sign[1] ? bmin[1] : bmax[1]; + const float max_z = ray_dir_sign[2] ? bmin[2] : bmax[2]; // X - const T tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0]; + const float tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0]; // MaxMult robust BVH traversal(up to 4 ulp). // 1.0000000000000004 for double precision. - const T tmax_x = (max_x - ray_org[0]) * ray_inv_dir[0] * 1.00000024f; + const float tmax_x = (max_x - ray_org[0]) * ray_inv_dir[0] * 1.00000024f; // Y - const T tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1]; - const T tmax_y = (max_y - ray_org[1]) * ray_inv_dir[1] * 1.00000024f; + const float tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1]; + const float tmax_y = (max_y - ray_org[1]) * ray_inv_dir[1] * 1.00000024f; // Z - const T tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2]; - const T tmax_z = (max_z - ray_org[2]) * ray_inv_dir[2] * 1.00000024f; + const float tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2]; + const float tmax_z = (max_z - ray_org[2]) * ray_inv_dir[2] * 1.00000024f; tmin = safemax(tmin_z, safemax(tmin_y, safemax(tmin_x, min_t))); tmax = safemin(tmax_z, safemin(tmax_y, safemin(tmax_x, max_t))); @@ -1712,14 +2311,58 @@ inline bool IntersectRayAABB(T *tminOut, // [out] return true; } + return false; // no hit +} + +template <> +inline bool IntersectRayAABB(double *tminOut, // [out] + double *tmaxOut, // [out] + double min_t, double max_t, + const double bmin[3], const double bmax[3], + real3 ray_org, + real3 ray_inv_dir, + int ray_dir_sign[3]) { + double tmin, tmax; + + const double min_x = ray_dir_sign[0] ? bmax[0] : bmin[0]; + const double min_y = ray_dir_sign[1] ? bmax[1] : bmin[1]; + const double min_z = ray_dir_sign[2] ? bmax[2] : bmin[2]; + const double max_x = ray_dir_sign[0] ? bmin[0] : bmax[0]; + const double max_y = ray_dir_sign[1] ? bmin[1] : bmax[1]; + const double max_z = ray_dir_sign[2] ? bmin[2] : bmax[2]; + + // X + const double tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0]; + // MaxMult robust BVH traversal(up to 4 ulp). + const double tmax_x = + (max_x - ray_org[0]) * ray_inv_dir[0] * 1.0000000000000004; + + // Y + const double tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1]; + const double tmax_y = + (max_y - ray_org[1]) * ray_inv_dir[1] * 1.0000000000000004; + + // Z + const double tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2]; + const double tmax_z = + (max_z - ray_org[2]) * ray_inv_dir[2] * 1.0000000000000004; + + tmin = safemax(tmin_z, safemax(tmin_y, safemax(tmin_x, min_t))); + tmax = safemin(tmax_z, safemin(tmax_y, safemin(tmax_x, max_t))); + if (tmin <= tmax) { + (*tminOut) = tmin; + (*tmaxOut) = tmax; + + return true; + } return false; // no hit } -template -inline bool BVHAccel::TestLeafNode(const BVHNode &node, - const Ray &ray, - const I &intersector) const { +template +template +inline bool BVHAccel::TestLeafNode(const BVHNode &node, const Ray &ray, + const I &intersector) const { bool hit = false; unsigned int num_primitives = node.data[0]; @@ -1742,39 +2385,41 @@ inline bool BVHAccel::TestLeafNode(const BVHNode &node, T local_t = t; if (intersector.Intersect(&local_t, prim_idx)) { - if (local_t > ray.min_t) { - // Update isect state - t = local_t; + // Update isect state + t = local_t; - intersector.Update(t, prim_idx); - hit = true; - } + intersector.Update(t, prim_idx); + hit = true; } } return hit; } -#if 0 -template -bool BVHAccel::MultiHitTestLeafNode(const BVHNode &node, - const Ray &ray, int max_intersections, const I &intersector) const { +#if 0 // TODO(LTE): Implement +template template +bool BVHAccel::MultiHitTestLeafNode( + std::priority_queue, Comp> *isect_pq, + int max_intersections, + const BVHNode &node, + const Ray &ray, + const I &intersector) const { bool hit = false; unsigned int num_primitives = node.data[0]; unsigned int offset = node.data[1]; T t = std::numeric_limits::max(); - if (isects->size() >= static_cast(max_intersections)) { - t = isects->top().t; // current furthest hit distance + if (isect_pq->size() >= static_cast(max_intersections)) { + t = isect_pq->top().t; // current furthest hit distance } - real3 ray_org; + real3 ray_org; ray_org[0] = ray.org[0]; ray_org[1] = ray.org[1]; ray_org[2] = ray.org[2]; - real3 ray_dir; + real3 ray_dir; ray_dir[0] = ray.dir[0]; ray_dir[1] = ray.dir[1]; ray_dir[2] = ray.dir[2]; @@ -1783,39 +2428,43 @@ bool BVHAccel::MultiHitTestLeafNode(const BVHNode &node, unsigned int prim_idx = indices_[i + offset]; T local_t = t, u = 0.0f, v = 0.0f; - if (p.Intersect(&local_t, &u, &v, prim_idx)) { + + if (intersector.Intersect(&local_t, &u, &v, prim_idx)) + { // Update isect state - if ((local_t > ray.min_t)) { - if (isects->size() < static_cast(max_intersections)) { - Intersection isect; + if ((local_t > ray.min_t)) + { + if (isect_pq->size() < static_cast(max_intersections)) + { + H isect; t = local_t; isect.t = t; isect.u = u; isect.v = v; isect.prim_id = prim_idx; - isects->push(isect); + isect_pq->push(isect); // Update t to furthest distance. t = ray.max_t; hit = true; - } else { - if (local_t < isects->top().t) { - // delete furthest intersection and add new intersection. - isects->pop(); - - Intersection isect; - isect.t = local_t; - isect.u = u; - isect.v = v; - isect.prim_id = prim_idx; - isects->push(isect); - - // Update furthest hit distance - t = isects->top().t; - - hit = true; - } + } + else if (local_t < isect_pq->top().t) + { + // delete furthest intersection and add new intersection. + isect_pq->pop(); + + H hit; + hit.t = local_t; + hit.u = u; + hit.v = v; + hit.prim_id = prim_idx; + isect_pq->push(hit); + + // Update furthest hit distance + t = isect_pq->top().t; + + hit = true; } } } @@ -1825,11 +2474,12 @@ bool BVHAccel::MultiHitTestLeafNode(const BVHNode &node, } #endif -template -bool BVHAccel::Traverse(const Ray &ray, - const BVHTraceOptions &options, - const I &intersector) const { +template +template +bool BVHAccel::Traverse(const Ray &ray, const I &intersector, H *isect, + const BVHTraceOptions &options) const { const int kMaxStackDepth = 512; + (void)kMaxStackDepth; T hit_t = ray.max_t; @@ -1843,15 +2493,17 @@ bool BVHAccel::Traverse(const Ray &ray, intersector.PrepareTraversal(ray, options); int dir_sign[3]; - dir_sign[0] = ray.dir[0] < 0.0f ? 1 : 0; - dir_sign[1] = ray.dir[1] < 0.0f ? 1 : 0; - dir_sign[2] = ray.dir[2] < 0.0f ? 1 : 0; + dir_sign[0] = ray.dir[0] < static_cast(0.0) ? 1 : 0; + dir_sign[1] = ray.dir[1] < static_cast(0.0) ? 1 : 0; + dir_sign[2] = ray.dir[2] < static_cast(0.0) ? 1 : 0; - // @fixme { Check edge case; i.e., 1/0 } real3 ray_inv_dir; - ray_inv_dir[0] = 1.0f / (ray.dir[0] + 1.0e-12f); - ray_inv_dir[1] = 1.0f / (ray.dir[1] + 1.0e-12f); - ray_inv_dir[2] = 1.0f / (ray.dir[2] + 1.0e-12f); + real3 ray_dir; + ray_dir[0] = ray.dir[0]; + ray_dir[1] = ray.dir[1]; + ray_dir[2] = ray.dir[2]; + + ray_inv_dir = vsafe_inverse(ray_dir); real3 ray_org; ray_org[0] = ray.org[0]; @@ -1870,38 +2522,84 @@ bool BVHAccel::Traverse(const Ray &ray, bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin, node.bmax, ray_org, ray_inv_dir, dir_sign); - if (node.flag == 0) { // branch node - if (hit) { + if (hit) { + // Branch node + if (node.flag == 0) { int order_near = dir_sign[node.axis]; int order_far = 1 - order_near; // Traverse near first. node_stack[++node_stack_index] = node.data[order_far]; node_stack[++node_stack_index] = node.data[order_near]; - } - } else { // leaf node - if (hit) { - if (TestLeafNode(node, ray, intersector)) { - hit_t = intersector.GetT(); - } + } else if (TestLeafNode(node, ray, intersector)) { // Leaf node + hit_t = intersector.GetT(); } } } - assert(node_stack_index < kMaxStackDepth); + assert(node_stack_index < kNANORT_MAX_STACK_DEPTH); bool hit = (intersector.GetT() < ray.max_t); - intersector.PostTraversal(ray, hit); + intersector.PostTraversal(ray, hit, isect); return hit; } -#if 0 -template -bool BVHAccel::MultiHitTraverse(const Ray &ray, - const BVHTraceOptions &options, - int max_intersections, - StackVector *isects) const { +template +template +inline bool BVHAccel::TestLeafNodeIntersections( + const BVHNode &node, const Ray &ray, const int max_intersections, + const I &intersector, + std::priority_queue, std::vector >, + NodeHitComparator > *isect_pq) const { + bool hit = false; + + unsigned int num_primitives = node.data[0]; + unsigned int offset = node.data[1]; + + real3 ray_org; + ray_org[0] = ray.org[0]; + ray_org[1] = ray.org[1]; + ray_org[2] = ray.org[2]; + + real3 ray_dir; + ray_dir[0] = ray.dir[0]; + ray_dir[1] = ray.dir[1]; + ray_dir[2] = ray.dir[2]; + + intersector.PrepareTraversal(ray); + + for (unsigned int i = 0; i < num_primitives; i++) { + unsigned int prim_idx = indices_[i + offset]; + + T min_t, max_t; + + if (intersector.Intersect(&min_t, &max_t, prim_idx)) { + // Always add to isect lists. + NodeHit isect; + isect.t_min = min_t; + isect.t_max = max_t; + isect.node_id = prim_idx; + + if (isect_pq->size() < static_cast(max_intersections)) { + isect_pq->push(isect); + } else if (min_t < isect_pq->top().t_min) { + // delete the furthest intersection and add a new intersection. + isect_pq->pop(); + + isect_pq->push(isect); + } + } + } + + return hit; +} + +template +template +bool BVHAccel::ListNodeIntersections( + const Ray &ray, int max_intersections, const I &intersector, + StackVector, 128> *hits) const { const int kMaxStackDepth = 512; T hit_t = ray.max_t; @@ -1911,58 +2609,153 @@ bool BVHAccel::MultiHitTraverse(const Ray &ray, node_stack[0] = 0; // Stores furthest intersection at top - std::priority_queue, IsectComparator > isect_pq; - //// Stores furthest intersection at top - // template - // typedef std::priority_queue, - // IsectComparator > - // IsectVector; + std::priority_queue, std::vector >, + NodeHitComparator > + isect_pq; - (*isects)->clear(); - - p.PrepareTraversal(ray, options); + (*hits)->clear(); int dir_sign[3]; - dir_sign[0] = ray.dir[0] < 0.0f ? 1 : 0; - dir_sign[1] = ray.dir[1] < 0.0f ? 1 : 0; - dir_sign[2] = ray.dir[2] < 0.0f ? 1 : 0; + dir_sign[0] = ray.dir[0] < static_cast(0.0) ? 1 : 0; + dir_sign[1] = ray.dir[1] < static_cast(0.0) ? 1 : 0; + dir_sign[2] = ray.dir[2] < static_cast(0.0) ? 1 : 0; + + real3 ray_inv_dir; + real3 ray_dir; + + ray_dir[0] = ray.dir[0]; + ray_dir[1] = ray.dir[1]; + ray_dir[2] = ray.dir[2]; - // @fixme { Check edge case; i.e., 1/0 } - real3 ray_inv_dir; - ray_inv_dir[0] = 1.0f / ray.dir[0]; - ray_inv_dir[1] = 1.0f / ray.dir[1]; - ray_inv_dir[2] = 1.0f / ray.dir[2]; + ray_inv_dir = vsafe_inverse(ray_dir); - real3 ray_org; + real3 ray_org; ray_org[0] = ray.org[0]; ray_org[1] = ray.org[1]; ray_org[2] = ray.org[2]; T min_t, max_t; + while (node_stack_index >= 0) { unsigned int index = node_stack[node_stack_index]; - const BVHNode &node = nodes_[static_cast(index)]; + const BVHNode &node = nodes_[static_cast(index)]; node_stack_index--; bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin, node.bmax, ray_org, ray_inv_dir, dir_sign); - if (node.flag == 0) { // branch node - if (hit) { + if (hit) { + // Branch node + if (node.flag == 0) { int order_near = dir_sign[node.axis]; int order_far = 1 - order_near; // Traverse near first. node_stack[++node_stack_index] = node.data[order_far]; node_stack[++node_stack_index] = node.data[order_near]; + } else { // Leaf node + TestLeafNodeIntersections(node, ray, max_intersections, intersector, + &isect_pq); } + } + } + + assert(node_stack_index < kMaxStackDepth); + (void)kMaxStackDepth; + + if (!isect_pq.empty()) { + // Store intesection in reverse order (make it frontmost order) + size_t n = isect_pq.size(); + (*hits)->resize(n); + + for (size_t i = 0; i < n; i++) { + const NodeHit &isect = isect_pq.top(); + (*hits)[n - i - 1] = isect; + isect_pq.pop(); + } - } else { // leaf node - if (hit) { - if (MultiHitTestLeafNode(&isect_pq, max_intersections, node, ray, p)) { + return true; + } + + return false; +} + +#if 0 // TODO(LTE): Implement +template template +bool BVHAccel::MultiHitTraverse(const Ray &ray, + int max_intersections, + const I &intersector, + StackVector *hits, + const BVHTraceOptions& options) const { + const int kMaxStackDepth = 512; + + T hit_t = ray.max_t; + + int node_stack_index = 0; + unsigned int node_stack[512]; + node_stack[0] = 0; + + // Stores furthest intersection at top + std::priority_queue, Comp> isect_pq; + + (*hits)->clear(); + + // Init isect info as no hit + intersector.Update(hit_t, static_cast(-1)); + + intersector.PrepareTraversal(ray, options); + + int dir_sign[3]; + dir_sign[0] = ray.dir[0] < static_cast(0.0) ? static_cast(1) : static_cast(0); + dir_sign[1] = ray.dir[1] < static_cast(0.0) ? static_cast(1) : static_cast(0); + dir_sign[2] = ray.dir[2] < static_cast(0.0) ? static_cast(1) : static_cast(0); + + real3 ray_inv_dir; + real3 ray_dir; + + ray_dir[0] = ray.dir[0]; + ray_dir[1] = ray.dir[1]; + ray_dir[2] = ray.dir[2]; + + ray_inv_dir = vsafe_inverse(ray_dir); + + real3 ray_org; + ray_org[0] = ray.org[0]; + ray_org[1] = ray.org[1]; + ray_org[2] = ray.org[2]; + + T min_t, max_t; + + while (node_stack_index >= 0) + { + unsigned int index = node_stack[node_stack_index]; + const BVHNode &node = nodes_[static_cast(index)]; + + node_stack_index--; + + bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin, + node.bmax, ray_org, ray_inv_dir, dir_sign); + + // branch node + if(hit) + { + if (node.flag == 0) + { + int order_near = dir_sign[node.axis]; + int order_far = 1 - order_near; + + // Traverse near first. + node_stack[++node_stack_index] = node.data[order_far]; + node_stack[++node_stack_index] = node.data[order_near]; + } + else + { + if (MultiHitTestLeafNode(&isect_pq, max_intersections, node, ray, intersector)) + { // Only update `hit_t` when queue is full. - if (isect_pq.size() >= static_cast(max_intersections)) { + if (isect_pq.size() >= static_cast(max_intersections)) + { hit_t = isect_pq.top().t; } } @@ -1971,14 +2764,18 @@ bool BVHAccel::MultiHitTraverse(const Ray &ray, } assert(node_stack_index < kMaxStackDepth); + (void)kMaxStackDepth; - if (!isect_pq.empty()) { - // Store intesection in reverse order(make it frontmost order) + if (!isect_pq.empty()) + { + // Store intesection in reverse order (make it frontmost order) size_t n = isect_pq.size(); - (*isects)->resize(n); - for (size_t i = 0; i < n; i++) { - const Intersection &isect = isect_pq.top(); - (*isects)[n - i - 1] = isect; + (*hits)->resize(n); + + for (size_t i = 0; i < n; i++) + { + const H &isect = isect_pq.top(); + (*hits)[n - i - 1] = isect; isect_pq.pop(); } @@ -1989,6 +2786,10 @@ bool BVHAccel::MultiHitTraverse(const Ray &ray, } #endif +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + } // namespace nanort #endif // NANORT_H_ diff --git a/src/surface/simple_polygon_mesh.cpp b/src/surface/simple_polygon_mesh.cpp index 111b7eb9..4256fc88 100644 --- a/src/surface/simple_polygon_mesh.cpp +++ b/src/surface/simple_polygon_mesh.cpp @@ -1,7 +1,7 @@ #include "geometrycentral/surface/simple_polygon_mesh.h" #include "happly.h" -#include "nanort/nanort.h" +#include "nanort.h" #include #include @@ -656,9 +656,7 @@ void SimplePolygonMesh::consistentlyOrientFaces(bool outwardOrient) { int32_t N_RAYS_PER_FACE = 4; std::vector rawPositions; std::vector rawFaces; - nanort::BVHAccel, nanort::TriangleSAHPred, - nanort::TriangleIntersector> - accel; + nanort::BVHAccel accel; double lengthScale = -1; auto traceDist = [&](Vector3 root, Vector3 dir) { nanort::Ray ray; @@ -667,9 +665,10 @@ void SimplePolygonMesh::consistentlyOrientFaces(bool outwardOrient) { for (int i = 0; i < 3; i++) ray.org[i] = root[i]; for (int i = 0; i < 3; i++) ray.dir[i] = dir[i]; nanort::BVHTraceOptions trace_options; + nanort::TriangleIntersection isect; nanort::TriangleIntersector triangle_intersector(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); - bool hit = accel.Traverse(ray, trace_options, triangle_intersector); - return triangle_intersector.intersection.t; + bool hit = accel.Traverse(ray, triangle_intersector, &isect, trace_options); + return isect.t; }; std::random_device rd; std::mt19937 gen(rd()); // we'll use this to generate rays on triangles @@ -700,7 +699,7 @@ void SimplePolygonMesh::consistentlyOrientFaces(bool outwardOrient) { nanort::TriangleMesh nanortMesh(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); nanort::TriangleSAHPred nanortMeshPred(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); nanort::BVHBuildOptions options; // Use default options - bool ret = accel.Build(nFaces(), options, nanortMesh, nanortMeshPred); + bool ret = accel.Build(nFaces(), nanortMesh, nanortMeshPred, options); if (!ret) { throw std::runtime_error("BVH construction failed"); } From 54ac6ad5ac08dd0ee0ec201a73038cf8351f6ef8 Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 22:34:03 -0700 Subject: [PATCH 2/7] update nanoflann to latest --- deps/nanoflann/include/nanoflann.hpp | 964 +++++++++++++++------------ 1 file changed, 528 insertions(+), 436 deletions(-) diff --git a/deps/nanoflann/include/nanoflann.hpp b/deps/nanoflann/include/nanoflann.hpp index fffe9243..dc297bc8 100644 --- a/deps/nanoflann/include/nanoflann.hpp +++ b/deps/nanoflann/include/nanoflann.hpp @@ -37,6 +37,12 @@ * nanoflann does not require compiling or installing, just an * #include in your code. * + * Macros that are observed in this file: + * - NANOFLANN_NO_THREADS: If defined, single thread will be enforced. + * - NANOFLANN_FIRST_MATCH: If defined, in case of a tie in distances the item with the smallest + * index will be returned. + * - NANOFLANN_NODE_ALIGNMENT: The memory alignment, in bytes, for kd-tree nodes. Default: 16 + * * See: * - [Online README](https://github.com/jlblancoc/nanoflann) * - [C++ API documentation](https://jlblancoc.github.io/nanoflann/) @@ -56,16 +62,16 @@ #include #include // std::numeric_limits #include +#include #include #include #include /** Library version: 0xMmP (M=Major,m=minor,P=patch) */ -#define NANOFLANN_VERSION 0x171 +#define NANOFLANN_VERSION 0x190 // Avoid conflicting declaration of min/max macros in Windows headers -#if !defined(NOMINMAX) && \ - (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) +#if !defined(NOMINMAX) && (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) #define NOMINMAX #ifdef max #undef max @@ -77,6 +83,20 @@ #undef None #endif +// Handle restricted pointers +#if defined(__GNUC__) || defined(__clang__) +#define NANOFLANN_RESTRICT __restrict__ +#elif defined(_MSC_VER) +#define NANOFLANN_RESTRICT __restrict +#else +#define NANOFLANN_RESTRICT +#endif + +// Memory alignment of KD-tree nodes: +#ifndef NANOFLANN_NODE_ALIGNMENT +#define NANOFLANN_NODE_ALIGNMENT 16 +#endif + namespace nanoflann { /** @addtogroup nanoflann_grp nanoflann C++ library for KD-trees @@ -84,7 +104,7 @@ namespace nanoflann /** the PI constant (required to avoid MSVC missing symbols) */ template -T pi_const() +constexpr T pi_const() { return static_cast(3.14159265358979323846); } @@ -99,8 +119,7 @@ struct has_resize : std::false_type }; template -struct has_resize().resize(1), 0)> - : std::true_type +struct has_resize().resize(1), 0)> : std::true_type { }; @@ -110,8 +129,7 @@ struct has_assign : std::false_type }; template -struct has_assign().assign(1, 0), 0)> - : std::true_type +struct has_assign().assign(1, 0), 0)> : std::true_type { }; @@ -130,11 +148,10 @@ inline typename std::enable_if::value, void>::type resize( * std::array) It raises an exception if the expected size does not match */ template -inline typename std::enable_if::value, void>::type - resize(Container& c, const size_t nElements) +inline typename std::enable_if::value, void>::type resize( + Container& c, const size_t nElements) { - if (nElements != c.size()) - throw std::logic_error("Try to change the size of a std::array."); + if (nElements != c.size()) throw std::logic_error("Attempt to resize a fixed size container."); } /** @@ -151,8 +168,8 @@ inline typename std::enable_if::value, void>::type assign( * Free function to assign to a std::array */ template -inline typename std::enable_if::value, void>::type - assign(Container& c, const size_t nElements, const T& value) +inline typename std::enable_if::value, void>::type assign( + Container& c, const size_t nElements, const T& value) { for (size_t i = 0; i < nElements; i++) c[i] = value; } @@ -180,8 +197,7 @@ template struct ResultItem { ResultItem() = default; - ResultItem(const IndexType index, const DistanceType distance) - : first(index), second(distance) + ResultItem(const IndexType index, const DistanceType distance) : first(index), second(distance) { } @@ -193,9 +209,7 @@ struct ResultItem * @{ */ /** Result set for KNN searches (N-closest neighbors) */ -template < - typename _DistanceType, typename _IndexType = size_t, - typename _CountType = size_t> +template class KNNResultSet { public: @@ -222,9 +236,9 @@ class KNNResultSet count = 0; } - CountType size() const { return count; } - bool empty() const { return count == 0; } - bool full() const { return count == capacity; } + CountType size() const noexcept { return count; } + bool empty() const noexcept { return count == 0; } + bool full() const noexcept { return count == capacity; } /** * Called during search to add an element matching the criteria. @@ -239,8 +253,7 @@ class KNNResultSet /** If defined and two points have the same distance, the one with * the lowest-index will be returned first. */ #ifdef NANOFLANN_FIRST_MATCH - if ((dists[i - 1] > dist) || - ((dist == dists[i - 1]) && (indices[i - 1] > index))) + if ((dists[i - 1] > dist) || ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else if (dists[i - 1] > dist) @@ -268,11 +281,10 @@ class KNNResultSet //! Returns the worst distance among found solutions if the search result is //! full, or the maximum possible distance, if not full yet. - DistanceType worstDist() const + DistanceType worstDist() const noexcept { - return (count < capacity || !count) - ? std::numeric_limits::max() - : dists[count - 1]; + return (count < capacity || !count) ? std::numeric_limits::max() + : dists[count - 1]; } void sort() @@ -282,9 +294,7 @@ class KNNResultSet }; /** Result set for RKNN searches (N-closest neighbors with a maximum radius) */ -template < - typename _DistanceType, typename _IndexType = size_t, - typename _CountType = size_t> +template class RKNNResultSet { public: @@ -300,8 +310,7 @@ class RKNNResultSet DistanceType maximumSearchDistanceSquared; public: - explicit RKNNResultSet( - CountType capacity_, DistanceType maximumSearchDistanceSquared_) + explicit RKNNResultSet(CountType capacity_, DistanceType maximumSearchDistanceSquared_) : indices(nullptr), dists(nullptr), capacity(capacity_), @@ -318,9 +327,9 @@ class RKNNResultSet if (capacity) dists[capacity - 1] = maximumSearchDistanceSquared; } - CountType size() const { return count; } - bool empty() const { return count == 0; } - bool full() const { return count == capacity; } + CountType size() const noexcept { return count; } + bool empty() const noexcept { return count == 0; } + bool full() const noexcept { return count == capacity; } /** * Called during search to add an element matching the criteria. @@ -335,8 +344,7 @@ class RKNNResultSet /** If defined and two points have the same distance, the one with * the lowest-index will be returned first. */ #ifdef NANOFLANN_FIRST_MATCH - if ((dists[i - 1] > dist) || - ((dist == dists[i - 1]) && (indices[i - 1] > index))) + if ((dists[i - 1] > dist) || ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else if (dists[i - 1] > dist) @@ -364,10 +372,9 @@ class RKNNResultSet //! Returns the worst distance among found solutions if the search result is //! full, or the maximum possible distance, if not full yet. - DistanceType worstDist() const + DistanceType worstDist() const noexcept { - return (count < capacity || !count) ? maximumSearchDistanceSquared - : dists[count - 1]; + return (count < capacity || !count) ? maximumSearchDistanceSquared : dists[count - 1]; } void sort() @@ -392,8 +399,7 @@ class RadiusResultSet std::vector>& m_indices_dists; explicit RadiusResultSet( - DistanceType radius_, - std::vector>& indices_dists) + DistanceType radius_, std::vector>& indices_dists) : radius(radius_), m_indices_dists(indices_dists) { init(); @@ -402,10 +408,9 @@ class RadiusResultSet void init() { clear(); } void clear() { m_indices_dists.clear(); } - size_t size() const { return m_indices_dists.size(); } - size_t empty() const { return m_indices_dists.empty(); } - - bool full() const { return true; } + size_t size() const noexcept { return m_indices_dists.size(); } + bool empty() const noexcept { return m_indices_dists.empty(); } + bool full() const noexcept { return true; } /** * Called during search to add an element matching the criteria. @@ -418,7 +423,7 @@ class RadiusResultSet return true; } - DistanceType worstDist() const { return radius; } + DistanceType worstDist() const noexcept { return radius; } /** * Find the worst result (farthest neighbor) without copying or sorting @@ -430,16 +435,12 @@ class RadiusResultSet throw std::runtime_error( "Cannot invoke RadiusResultSet::worst_item() on " "an empty list of results."); - auto it = std::max_element( - m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); + auto it = + std::max_element(m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); return *it; } - void sort() - { - std::sort( - m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); - } + void sort() { std::sort(m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); } }; /** @} */ @@ -493,9 +494,7 @@ struct Metric * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> +template struct L1_Adaptor { using ElementType = T; @@ -505,41 +504,71 @@ struct L1_Adaptor L1_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size, + inline DistanceType evalMetric( + const T* NANOFLANN_RESTRICT a, const IndexType b_idx, size_t size, DistanceType worst_dist = -1) const { - DistanceType result = DistanceType(); - const T* last = a + size; - const T* lastgroup = last - 3; - size_t d = 0; + DistanceType result = DistanceType(); + const size_t multof4 = (size >> 2) << 2; // largest multiple of 4, i.e. 1 << 2 + size_t d; /* Process 4 items with each loop for efficiency. */ - while (a < lastgroup) + if (worst_dist <= 0) + { + /* No checks with worst_dist. */ + for (d = 0; d < multof4; d += 4) + { + const DistanceType diff0 = + std::abs(a[d + 0] - data_source.kdtree_get_pt(b_idx, d + 0)); + const DistanceType diff1 = + std::abs(a[d + 1] - data_source.kdtree_get_pt(b_idx, d + 1)); + const DistanceType diff2 = + std::abs(a[d + 2] - data_source.kdtree_get_pt(b_idx, d + 2)); + const DistanceType diff3 = + std::abs(a[d + 3] - data_source.kdtree_get_pt(b_idx, d + 3)); + /* Parentheses break dependency chain: */ + result += (diff0 + diff1) + (diff2 + diff3); + } + } + else { - const DistanceType diff0 = - std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++)); - const DistanceType diff1 = - std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++)); - const DistanceType diff2 = - std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++)); - const DistanceType diff3 = - std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++)); - result += diff0 + diff1 + diff2 + diff3; - a += 4; - if ((worst_dist > 0) && (result > worst_dist)) { return result; } + /* Check with worst_dist. */ + for (d = 0; d < multof4; d += 4) + { + const DistanceType diff0 = + std::abs(a[d + 0] - data_source.kdtree_get_pt(b_idx, d + 0)); + const DistanceType diff1 = + std::abs(a[d + 1] - data_source.kdtree_get_pt(b_idx, d + 1)); + const DistanceType diff2 = + std::abs(a[d + 2] - data_source.kdtree_get_pt(b_idx, d + 2)); + const DistanceType diff3 = + std::abs(a[d + 3] - data_source.kdtree_get_pt(b_idx, d + 3)); + /* Parentheses break dependency chain: */ + result += (diff0 + diff1) + (diff2 + diff3); + if (result > worst_dist) + { + return result; + } + } } - /* Process last 0-3 components. Not needed for standard vector lengths. + /* Process last 0-3 components. Unrolled loop with fall-through switch. */ - while (a < last) + switch (size - multof4) { - result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++)); + case 3: + result += std::abs(a[d + 2] - data_source.kdtree_get_pt(b_idx, d + 2)); + case 2: + result += std::abs(a[d + 1] - data_source.kdtree_get_pt(b_idx, d + 1)); + case 1: + result += std::abs(a[d + 0] - data_source.kdtree_get_pt(b_idx, d + 0)); + case 0: + break; } return result; } template - DistanceType accum_dist(const U a, const V b, const size_t) const + inline DistanceType accum_dist(const U a, const V b, const size_t) const { return std::abs(a - b); } @@ -555,9 +584,7 @@ struct L1_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> +template struct L2_Adaptor { using ElementType = T; @@ -567,46 +594,70 @@ struct L2_Adaptor L2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size, + inline DistanceType evalMetric( + const T* NANOFLANN_RESTRICT a, const IndexType b_idx, size_t size, DistanceType worst_dist = -1) const { - DistanceType result = DistanceType(); - const T* last = a + size; - const T* lastgroup = last - 3; - size_t d = 0; + DistanceType result = DistanceType(); + const size_t multof4 = (size >> 2) << 2; // largest multiple of 4, i.e. 1 << 2 + size_t d; /* Process 4 items with each loop for efficiency. */ - while (a < lastgroup) + if (worst_dist <= 0) { - const DistanceType diff0 = - a[0] - data_source.kdtree_get_pt(b_idx, d++); - const DistanceType diff1 = - a[1] - data_source.kdtree_get_pt(b_idx, d++); - const DistanceType diff2 = - a[2] - data_source.kdtree_get_pt(b_idx, d++); - const DistanceType diff3 = - a[3] - data_source.kdtree_get_pt(b_idx, d++); - result += - diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; - a += 4; - if ((worst_dist > 0) && (result > worst_dist)) { return result; } + /* No checks with worst_dist. */ + for (d = 0; d < multof4; d += 4) + { + const DistanceType diff0 = a[d + 0] - data_source.kdtree_get_pt(b_idx, d + 0); + const DistanceType diff1 = a[d + 1] - data_source.kdtree_get_pt(b_idx, d + 1); + const DistanceType diff2 = a[d + 2] - data_source.kdtree_get_pt(b_idx, d + 2); + const DistanceType diff3 = a[d + 3] - data_source.kdtree_get_pt(b_idx, d + 3); + /* Parentheses break dependency chain: */ + result += (diff0 * diff0 + diff1 * diff1) + (diff2 * diff2 + diff3 * diff3); + } } - /* Process last 0-3 components. Not needed for standard vector lengths. + else + { + /* Check with worst_dist. */ + for (d = 0; d < multof4; d += 4) + { + const DistanceType diff0 = a[d + 0] - data_source.kdtree_get_pt(b_idx, d + 0); + const DistanceType diff1 = a[d + 1] - data_source.kdtree_get_pt(b_idx, d + 1); + const DistanceType diff2 = a[d + 2] - data_source.kdtree_get_pt(b_idx, d + 2); + const DistanceType diff3 = a[d + 3] - data_source.kdtree_get_pt(b_idx, d + 3); + /* Parentheses break dependency chain: */ + result += (diff0 * diff0 + diff1 * diff1) + (diff2 * diff2 + diff3 * diff3); + if (result > worst_dist) + { + return result; + } + } + } + /* Process last 0-3 components. Unrolled loop with fall-through switch. */ - while (a < last) + DistanceType diff; + switch (size - multof4) { - const DistanceType diff0 = - *a++ - data_source.kdtree_get_pt(b_idx, d++); - result += diff0 * diff0; + case 3: + diff = a[d + 2] - data_source.kdtree_get_pt(b_idx, d + 2); + result += diff * diff; + case 2: + diff = a[d + 1] - data_source.kdtree_get_pt(b_idx, d + 1); + result += diff * diff; + case 1: + diff = a[d + 0] - data_source.kdtree_get_pt(b_idx, d + 0); + result += diff * diff; + case 0: + break; } return result; } template - DistanceType accum_dist(const U a, const V b, const size_t) const + inline DistanceType accum_dist(const U a, const V b, const size_t) const { - return (a - b) * (a - b); + auto diff = a - b; + return diff * diff; } }; @@ -620,9 +671,7 @@ struct L2_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> +template struct L2_Simple_Adaptor { using ElementType = T; @@ -630,28 +679,24 @@ struct L2_Simple_Adaptor const DataSource& data_source; - L2_Simple_Adaptor(const DataSource& _data_source) - : data_source(_data_source) - { - } + L2_Simple_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const + inline DistanceType evalMetric(const T* a, const IndexType b_idx, size_t size) const { DistanceType result = DistanceType(); for (size_t i = 0; i < size; ++i) { - const DistanceType diff = - a[i] - data_source.kdtree_get_pt(b_idx, i); + const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i); result += diff * diff; } return result; } template - DistanceType accum_dist(const U a, const V b, const size_t) const + inline DistanceType accum_dist(const U a, const V b, const size_t) const { - return (a - b) * (a - b); + auto diff = a - b; + return diff * diff; } }; @@ -665,9 +710,7 @@ struct L2_Simple_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> +template struct SO2_Adaptor { using ElementType = T; @@ -677,17 +720,15 @@ struct SO2_Adaptor SO2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const + inline DistanceType evalMetric(const T* a, const IndexType b_idx, size_t size) const { - return accum_dist( - a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), size - 1); + return accum_dist(a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), size - 1); } /** Note: this assumes that input angles are already in the range [-pi,pi] */ template - DistanceType accum_dist(const U a, const V b, const size_t) const + inline DistanceType accum_dist(const U a, const V b, const size_t) const { DistanceType result = DistanceType(); DistanceType PI = pi_const(); @@ -710,30 +751,23 @@ struct SO2_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> +template struct SO3_Adaptor { using ElementType = T; using DistanceType = _DistanceType; - L2_Simple_Adaptor - distance_L2_Simple; + L2_Simple_Adaptor distance_L2_Simple; - SO3_Adaptor(const DataSource& _data_source) - : distance_L2_Simple(_data_source) - { - } + SO3_Adaptor(const DataSource& _data_source) : distance_L2_Simple(_data_source) {} - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const + inline DistanceType evalMetric(const T* a, const IndexType b_idx, size_t size) const { return distance_L2_Simple.evalMetric(a, b_idx, size); } template - DistanceType accum_dist(const U a, const V b, const size_t idx) const + inline DistanceType accum_dist(const U a, const V b, const size_t idx) const { return distance_L2_Simple.accum_dist(a, b, idx); } @@ -742,7 +776,7 @@ struct SO3_Adaptor /** Metaprogramming helper traits class for the L1 (Manhattan) metric */ struct metric_L1 : public Metric { - template + template struct traits { using distance_t = L1_Adaptor; @@ -752,7 +786,7 @@ struct metric_L1 : public Metric * distance metric */ struct metric_L2 : public Metric { - template + template struct traits { using distance_t = L2_Adaptor; @@ -762,7 +796,7 @@ struct metric_L2 : public Metric * **squared** distance metric */ struct metric_L2_Simple : public Metric { - template + template struct traits { using distance_t = L2_Simple_Adaptor; @@ -771,7 +805,7 @@ struct metric_L2_Simple : public Metric /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ struct metric_SO2 : public Metric { - template + template struct traits { using distance_t = SO2_Adaptor; @@ -780,7 +814,7 @@ struct metric_SO2 : public Metric /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ struct metric_SO3 : public Metric { - template + template struct traits { using distance_t = SO3_Adaptor; @@ -801,8 +835,7 @@ enum class KDTreeSingleIndexAdaptorFlags inline std::underlying_type::type operator&( KDTreeSingleIndexAdaptorFlags lhs, KDTreeSingleIndexAdaptorFlags rhs) { - using underlying = - typename std::underlying_type::type; + using underlying = typename std::underlying_type::type; return static_cast(lhs) & static_cast(rhs); } @@ -810,13 +843,10 @@ inline std::underlying_type::type operator&( struct KDTreeSingleIndexAdaptorParams { KDTreeSingleIndexAdaptorParams( - size_t _leaf_max_size = 10, - KDTreeSingleIndexAdaptorFlags _flags = - KDTreeSingleIndexAdaptorFlags::None, - unsigned int _n_thread_build = 1) - : leaf_max_size(_leaf_max_size), - flags(_flags), - n_thread_build(_n_thread_build) + size_t _leaf_max_size = 10, + KDTreeSingleIndexAdaptorFlags _flags = KDTreeSingleIndexAdaptorFlags::None, + unsigned int _n_thread_build = 1) + : leaf_max_size(_leaf_max_size), flags(_flags), n_thread_build(_n_thread_build) { } @@ -828,10 +858,7 @@ struct KDTreeSingleIndexAdaptorParams /** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */ struct SearchParameters { - SearchParameters(float eps_ = 0, bool sorted_ = true) - : eps(eps_), sorted(sorted_) - { - } + SearchParameters(float eps_ = 0, bool sorted_ = true) : eps(eps_), sorted(sorted_) {} float eps; //!< search for eps-approximate neighbours (default: 0) bool sorted; //!< only for radius search, require neighbours sorted by @@ -864,14 +891,14 @@ class PooledAllocator /* We maintain memory alignment to word boundaries by requiring that all allocations be in multiples of the machine wordsize. */ /* Size of machine word in bytes. Must be power of 2. */ - /* Minimum number of bytes requested at a time from the system. Must be + /* Minimum number of bytes requested at a time from the system. Must be * multiple of WORDSIZE. */ using Size = size_t; Size remaining_ = 0; //!< Number of bytes left in current block of storage - void* base_ = nullptr; //!< Pointer to base of current block of storage - void* loc_ = nullptr; //!< Current location in block to next allocate + void* base_ = nullptr; //!< Pointer to base of current block of storage + void* loc_ = nullptr; //!< Current location in block to next allocate void internal_init() { @@ -928,14 +955,12 @@ class PooledAllocator wastedMemory += remaining_; /* Allocate new storage. */ - const Size blocksize = - size > BLOCKSIZE ? size + WORDSIZE : BLOCKSIZE + WORDSIZE; + const Size blocksize = size > BLOCKSIZE ? size + WORDSIZE : BLOCKSIZE + WORDSIZE; // use the standard C malloc to allocate memory void* m = ::malloc(blocksize); if (!m) { - fprintf(stderr, "Failed to allocate memory.\n"); throw std::bad_alloc(); } @@ -1033,10 +1058,19 @@ class KDTreeBaseClass using Size = typename decltype(vAcc_)::size_type; using Dimension = int32_t; - /*--------------------------- + /*------------------------------------------------------------------- * Internal Data Structures - * --------------------------*/ - struct Node + * + * "Node" below can be declared with alignas(N) to improve + * cache friendliness and SIMD load/store performance. + * + * The optimal N depends on the underlying hardware: + * + Intel x86-64: 16 for SSE, 32 for AVX/AVX2 and 64 for AVX-512 + * + NVIDIA Jetson: 16 for ARM + NEON and CUDA float4/ + * To avoid unnecessary padding, the smallest alignment + * compatible with a platform's vector width should be chosen. + * ------------------------------------------------------------------*/ + struct alignas(NANOFLANN_NODE_ALIGNMENT) Node { /** Union used because a node can be either a LEAF node or a non-leaf * node, so both data fields are never used simultaneously */ @@ -1102,29 +1136,31 @@ class KDTreeBaseClass Size size(const Derived& obj) const { return obj.size_; } /** Returns the length of each point in the dataset */ - Size veclen(const Derived& obj) { return DIM > 0 ? DIM : obj.dim; } + Size veclen(const Derived& obj) const { return DIM > 0 ? DIM : obj.dim_; } /// Helper accessor to the dataset points: - ElementType dataset_get( - const Derived& obj, IndexType element, Dimension component) const + ElementType dataset_get(const Derived& obj, IndexType element, Dimension component) const { return obj.dataset_.kdtree_get_pt(element, component); } /** - * Computes the inde memory usage + * Computes the index memory usage * Returns: memory used by the index */ - Size usedMemory(Derived& obj) + Size usedMemory(const Derived& obj) const { return obj.pool_.usedMemory + obj.pool_.wastedMemory + obj.dataset_.kdtree_get_point_count() * sizeof(IndexType); // pool memory and vind array memory } + /** + * Compute the minimum and maximum element values in the specified dimension + */ void computeMinMax( - const Derived& obj, Offset ind, Size count, Dimension element, - ElementType& min_elem, ElementType& max_elem) + const Derived& obj, Offset ind, Size count, Dimension element, ElementType& min_elem, + ElementType& max_elem) const { min_elem = dataset_get(obj, vAcc_[ind], element); max_elem = min_elem; @@ -1142,13 +1178,14 @@ class KDTreeBaseClass * * @param left index of the first vector * @param right index of the last vector + * @param bbox bounding box used as input for splitting and output for + * parent node */ - NodePtr divideTree( - Derived& obj, const Offset left, const Offset right, BoundingBox& bbox) + NodePtr divideTree(Derived& obj, const Offset left, const Offset right, BoundingBox& bbox) { assert(left < obj.dataset_.kdtree_get_point_count()); - NodePtr node = obj.pool_.template allocate(); // allocate memory + NodePtr node = obj.pool_.template allocate(); // allocate memory const auto dims = (DIM > 0 ? DIM : obj.dim_); /* If too few exemplars remain, then make this a leaf node. */ @@ -1176,6 +1213,7 @@ class KDTreeBaseClass } else { + /* Determine the index, dimension and value for split plane */ Offset idx; Dimension cutfeat; DistanceType cutval; @@ -1183,13 +1221,15 @@ class KDTreeBaseClass node->node_type.sub.divfeat = cutfeat; + /* Recurse on left */ BoundingBox left_bbox(bbox); left_bbox[cutfeat].high = cutval; - node->child1 = this->divideTree(obj, left, left + idx, left_bbox); + node->child1 = this->divideTree(obj, left, left + idx, left_bbox); + /* Recurse on right */ BoundingBox right_bbox(bbox); right_bbox[cutfeat].low = cutval; - node->child2 = this->divideTree(obj, left + idx, right, right_bbox); + node->child2 = this->divideTree(obj, left + idx, right, right_bbox); node->node_type.sub.divlow = left_bbox[cutfeat].high; node->node_type.sub.divhigh = right_bbox[cutfeat].low; @@ -1211,6 +1251,8 @@ class KDTreeBaseClass * * @param left index of the first vector * @param right index of the last vector + * @param bbox bounding box used as input for splitting and output for + * parent node * @param thread_count count of std::async threads * @param mutex mutex for mempool allocation */ @@ -1219,7 +1261,7 @@ class KDTreeBaseClass std::atomic& thread_count, std::mutex& mutex) { std::unique_lock lock(mutex); - NodePtr node = obj.pool_.template allocate(); // allocate memory + NodePtr node = obj.pool_.template allocate(); // allocate memory lock.unlock(); const auto dims = (DIM > 0 ? DIM : obj.dim_); @@ -1249,6 +1291,7 @@ class KDTreeBaseClass } else { + /* Determine the index, dimension and value for split plane */ Offset idx; Dimension cutfeat; DistanceType cutval; @@ -1258,32 +1301,42 @@ class KDTreeBaseClass std::future right_future; + /* Recurse on right concurrently, if possible */ + BoundingBox right_bbox(bbox); right_bbox[cutfeat].low = cutval; if (++thread_count < n_thread_build_) { - // Concurrent right sub-tree + /* Concurrent thread for right recursion */ + right_future = std::async( - std::launch::async, &KDTreeBaseClass::divideTreeConcurrent, - this, std::ref(obj), left + idx, right, - std::ref(right_bbox), std::ref(thread_count), + std::launch::async, &KDTreeBaseClass::divideTreeConcurrent, this, std::ref(obj), + left + idx, right, std::ref(right_bbox), std::ref(thread_count), std::ref(mutex)); } - else { --thread_count; } + else + { + --thread_count; + } + + /* Recurse on left in this thread */ BoundingBox left_bbox(bbox); left_bbox[cutfeat].high = cutval; - node->child1 = this->divideTreeConcurrent( - obj, left, left + idx, left_bbox, thread_count, mutex); + node->child1 = + this->divideTreeConcurrent(obj, left, left + idx, left_bbox, thread_count, mutex); if (right_future.valid()) { - // Block and wait for concurrent right sub-tree + /* Block and wait for concurrent right from above */ + node->child2 = right_future.get(); --thread_count; } else { + /* Otherwise, recurse on right in this thread */ + node->child2 = this->divideTreeConcurrent( obj, left + idx, right, right_bbox, thread_count, mutex); } @@ -1302,56 +1355,86 @@ class KDTreeBaseClass } void middleSplit_( - const Derived& obj, const Offset ind, const Size count, Offset& index, - Dimension& cutfeat, DistanceType& cutval, const BoundingBox& bbox) + const Derived& obj, const Offset ind, const Size count, Offset& index, Dimension& cutfeat, + DistanceType& cutval, const BoundingBox& bbox) { - const auto dims = (DIM > 0 ? DIM : obj.dim_); - const auto EPS = static_cast(0.00001); + const auto dims = (DIM > 0 ? DIM : obj.dim_); + const auto EPS = static_cast(0.00001); + + // Pre-compute max_span once ElementType max_span = bbox[0].high - bbox[0].low; for (Dimension i = 1; i < dims; ++i) { ElementType span = bbox[i].high - bbox[i].low; - if (span > max_span) { max_span = span; } + if (span > max_span) max_span = span; } - ElementType max_spread = -1; + + // Single-pass min/max computation for candidate dimensions cutfeat = 0; + ElementType max_spread = -1; ElementType min_elem = 0, max_elem = 0; + + // Only check dimensions within (1-EPS) of max_span + std::vector candidates; + candidates.reserve(dims); for (Dimension i = 0; i < dims; ++i) { - ElementType span = bbox[i].high - bbox[i].low; - if (span >= (1 - EPS) * max_span) + if (bbox[i].high - bbox[i].low >= (1 - EPS) * max_span) { - ElementType min_elem_, max_elem_; - computeMinMax(obj, ind, count, i, min_elem_, max_elem_); - ElementType spread = max_elem_ - min_elem_; - if (spread > max_spread) - { - cutfeat = i; - max_spread = spread; - min_elem = min_elem_; - max_elem = max_elem_; - } + candidates.push_back(i); } } - // split in the middle + + // Vectorized min/max for candidates + for (Dimension dim : candidates) + { + ElementType local_min = dataset_get(obj, vAcc_[ind], dim); + ElementType local_max = local_min; + + // Unrolled loop for better performance + constexpr size_t UNROLL = 4; + Offset k = 1; + for (; k + UNROLL <= count; k += UNROLL) + { + ElementType v0 = dataset_get(obj, vAcc_[ind + k], dim); + ElementType v1 = dataset_get(obj, vAcc_[ind + k + 1], dim); + ElementType v2 = dataset_get(obj, vAcc_[ind + k + 2], dim); + ElementType v3 = dataset_get(obj, vAcc_[ind + k + 3], dim); + + local_min = std::min({local_min, v0, v1, v2, v3}); + local_max = std::max({local_max, v0, v1, v2, v3}); + } + + // Handle remainder + for (; k < count; ++k) + { + ElementType val = dataset_get(obj, vAcc_[ind + k], dim); + local_min = std::min(local_min, val); + local_max = std::max(local_max, val); + } + + ElementType spread = local_max - local_min; + if (spread > max_spread) + { + cutfeat = dim; + max_spread = spread; + min_elem = local_min; + max_elem = local_max; + } + } + + // Median-of-three for better balance DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; + if (split_val < min_elem) split_val = min_elem; + if (split_val > max_elem) split_val = max_elem; - if (split_val < min_elem) - cutval = min_elem; - else if (split_val > max_elem) - cutval = max_elem; - else - cutval = split_val; + cutval = split_val; + // Optimized partitioning Offset lim1, lim2; planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); - if (lim1 > count / 2) - index = lim1; - else if (lim2 < count / 2) - index = lim2; - else - index = count / 2; + index = (lim1 > count / 2) ? lim1 : (lim2 < count / 2) ? lim2 : count / 2; } /** @@ -1359,57 +1442,46 @@ class KDTreeBaseClass * corresponding to the 'cutfeat' dimension at 'cutval' position. * * On return: - * dataset[ind[0..lim1-1]][cutfeat]cutval + * dataset[ind[0..lim1-1]][cutfeat] < cutval + * dataset[ind[lim1..lim2-1]][cutfeat] == cutval + * dataset[ind[lim2..count]][cutfeat] > cutval */ void planeSplit( - const Derived& obj, const Offset ind, const Size count, - const Dimension cutfeat, const DistanceType& cutval, Offset& lim1, - Offset& lim2) + const Derived& obj, const Offset ind, const Size count, const Dimension cutfeat, + const DistanceType& cutval, Offset& lim1, Offset& lim2) { - /* Move vector indices for left subtree to front of list. */ + // Dutch National Flag algorithm for three-way partitioning Offset left = 0; + Offset mid = 0; Offset right = count - 1; - for (;;) - { - while (left <= right && - dataset_get(obj, vAcc_[ind + left], cutfeat) < cutval) - ++left; - while (right && left <= right && - dataset_get(obj, vAcc_[ind + right], cutfeat) >= cutval) - --right; - if (left > right || !right) - break; // "!right" was added to support unsigned Index types - std::swap(vAcc_[ind + left], vAcc_[ind + right]); - ++left; - --right; - } - /* If either list is empty, it means that all remaining features - * are identical. Split in the middle to maintain a balanced tree. - */ - lim1 = left; - right = count - 1; - for (;;) + + while (mid <= right) { - while (left <= right && - dataset_get(obj, vAcc_[ind + left], cutfeat) <= cutval) - ++left; - while (right && left <= right && - dataset_get(obj, vAcc_[ind + right], cutfeat) > cutval) - --right; - if (left > right || !right) - break; // "!right" was added to support unsigned Index types - std::swap(vAcc_[ind + left], vAcc_[ind + right]); - ++left; - --right; + ElementType val = dataset_get(obj, vAcc_[ind + mid], cutfeat); + + if (val < cutval) + { + std::swap(vAcc_[ind + left], vAcc_[ind + mid]); + left++; + mid++; + } + else if (val > cutval) + { + std::swap(vAcc_[ind + mid], vAcc_[ind + right]); + right--; + } + else + { + mid++; + } } - lim2 = left; + + lim1 = left; + lim2 = mid; } DistanceType computeInitialDistances( - const Derived& obj, const ElementType* vec, - distance_vector_t& dists) const + const Derived& obj, const ElementType* vec, distance_vector_t& dists) const { assert(vec); DistanceType dist = DistanceType(); @@ -1418,34 +1490,43 @@ class KDTreeBaseClass { if (vec[i] < obj.root_bbox_[i].low) { - dists[i] = - obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].low, i); + dists[i] = obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].low, i); dist += dists[i]; } if (vec[i] > obj.root_bbox_[i].high) { - dists[i] = - obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].high, i); + dists[i] = obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].high, i); dist += dists[i]; } } return dist; } - static void save_tree( - const Derived& obj, std::ostream& stream, const NodeConstPtr tree) + static void save_tree(const Derived& obj, std::ostream& stream, const NodeConstPtr tree) { save_value(stream, *tree); - if (tree->child1 != nullptr) { save_tree(obj, stream, tree->child1); } - if (tree->child2 != nullptr) { save_tree(obj, stream, tree->child2); } + if (tree->child1 != nullptr) + { + save_tree(obj, stream, tree->child1); + } + if (tree->child2 != nullptr) + { + save_tree(obj, stream, tree->child2); + } } static void load_tree(Derived& obj, std::istream& stream, NodePtr& tree) { tree = obj.pool_.template allocate(); load_value(stream, *tree); - if (tree->child1 != nullptr) { load_tree(obj, stream, tree->child1); } - if (tree->child2 != nullptr) { load_tree(obj, stream, tree->child2); } + if (tree->child1 != nullptr) + { + load_tree(obj, stream, tree->child1); + } + if (tree->child2 != nullptr) + { + load_tree(obj, stream, tree->child2); + } } /** Stores the index in a binary file. @@ -1520,19 +1601,16 @@ class KDTreeBaseClass * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ -template < - typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename index_t = uint32_t> +template class KDTreeSingleIndexAdaptor : public KDTreeBaseClass< - KDTreeSingleIndexAdaptor, - Distance, DatasetAdaptor, DIM, index_t> + KDTreeSingleIndexAdaptor, Distance, + DatasetAdaptor, DIM, index_t> { public: /** Deleted copy constructor*/ explicit KDTreeSingleIndexAdaptor( - const KDTreeSingleIndexAdaptor< - Distance, DatasetAdaptor, DIM, index_t>&) = delete; + const KDTreeSingleIndexAdaptor&) = delete; /** The data source used by this index */ const DatasetAdaptor& dataset_; @@ -1542,9 +1620,8 @@ class KDTreeSingleIndexAdaptor Distance distance_; using Base = typename nanoflann::KDTreeBaseClass< - nanoflann::KDTreeSingleIndexAdaptor< - Distance, DatasetAdaptor, DIM, index_t>, - Distance, DatasetAdaptor, DIM, index_t>; + nanoflann::KDTreeSingleIndexAdaptor, Distance, + DatasetAdaptor, DIM, index_t>; using Offset = typename Base::Offset; using Size = typename Base::Size; @@ -1607,9 +1684,7 @@ class KDTreeSingleIndexAdaptor } private: - void init( - const Dimension dimensionality, - const KDTreeSingleIndexAdaptorParams& params) + void init(const Dimension dimensionality, const KDTreeSingleIndexAdaptorParams& params) { Base::size_ = dataset_.kdtree_get_point_count(); Base::size_at_index_build_ = Base::size_; @@ -1622,12 +1697,10 @@ class KDTreeSingleIndexAdaptor } else { - Base::n_thread_build_ = - std::max(std::thread::hardware_concurrency(), 1u); + Base::n_thread_build_ = std::max(std::thread::hardware_concurrency(), 1u); } - if (!(params.flags & - KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex)) + if (!(params.flags & KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex)) { // Build KD-tree: buildIndex(); @@ -1650,8 +1723,7 @@ class KDTreeSingleIndexAdaptor // construct the tree if (Base::n_thread_build_ == 1) { - Base::root_node_ = - this->divideTree(*this, 0, Base::size_, Base::root_bbox_); + Base::root_node_ = this->divideTree(*this, 0, Base::size_, Base::root_bbox_); } else { @@ -1687,8 +1759,7 @@ class KDTreeSingleIndexAdaptor */ template bool findNeighbors( - RESULTSET& result, const ElementType* vec, - const SearchParameters& searchParams = {}) const + RESULTSET& result, const ElementType* vec, const SearchParameters& searchParams = {}) const { assert(vec); if (this->size(*this) == 0) return false; @@ -1711,6 +1782,68 @@ class KDTreeSingleIndexAdaptor return result.full(); } + /** + * Find all points contained within the specified bounding box. Their + * indices are stored inside the result object. + * + * Params: + * result = the result object in which the indices of the points + * within the bounding box are stored + * bbox = the bounding box defining the search region + * + * \tparam RESULTSET Should be any ResultSet + * \return Number of points found within the bounding box. + * \sa findNeighbors, knnSearch, radiusSearch + * + * \note The search is inclusive - points on the boundary are included. + */ + template + Size findWithinBox(RESULTSET& result, const BoundingBox& bbox) const + { + if (this->size(*this) == 0) return 0; + if (!Base::root_node_) + throw std::runtime_error( + "[nanoflann] findWithinBox() called before building the " + "index."); + + std::stack stack; + stack.push(Base::root_node_); + + while (!stack.empty()) + { + const NodePtr node = stack.top(); + stack.pop(); + + // If this is a leaf node, then do check and return. + if (!node->child1) // (if one node is nullptr, both are) + { + for (Offset i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) + { + if (contains(bbox, Base::vAcc_[i])) + { + if (!result.addPoint(0, Base::vAcc_[i])) + { + // the resultset doesn't want to receive any more + // points, we're done searching! + return result.size(); + } + } + } + } + else + { + const int idx = node->node_type.sub.divfeat; + const auto low_bound = node->node_type.sub.divlow; + const auto high_bound = node->node_type.sub.divhigh; + + if (bbox[idx].low <= low_bound) stack.push(node->child1); + if (bbox[idx].high >= high_bound) stack.push(node->child2); + } + } + + return result.size(); + } + /** * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. * Their indices and distances are stored in the provided pointers to @@ -1727,8 +1860,8 @@ class KDTreeSingleIndexAdaptor * number of elements in the tree is less than `num_closest`. */ Size knnSearch( - const ElementType* query_point, const Size num_closest, - IndexType* out_indices, DistanceType* out_distances) const + const ElementType* query_point, const Size num_closest, IndexType* out_indices, + DistanceType* out_distances) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances); @@ -1758,12 +1891,10 @@ class KDTreeSingleIndexAdaptor Size radiusSearch( const ElementType* query_point, const DistanceType& radius, std::vector>& IndicesDists, - const SearchParameters& searchParams = {}) const + const SearchParameters& searchParams = {}) const { - RadiusResultSet resultSet( - radius, IndicesDists); - const Size nFound = - radiusSearchCustomCallback(query_point, resultSet, searchParams); + RadiusResultSet resultSet(radius, IndicesDists); + const Size nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); return nFound; } @@ -1798,12 +1929,10 @@ class KDTreeSingleIndexAdaptor * number of elements in the tree is less than `num_closest`. */ Size rknnSearch( - const ElementType* query_point, const Size num_closest, - IndexType* out_indices, DistanceType* out_distances, - const DistanceType& radius) const + const ElementType* query_point, const Size num_closest, IndexType* out_indices, + DistanceType* out_distances, const DistanceType& radius) const { - nanoflann::RKNNResultSet resultSet( - num_closest, radius); + nanoflann::RKNNResultSet resultSet(num_closest, radius); resultSet.init(out_indices, out_distances); findNeighbors(resultSet, query_point); return resultSet.size(); @@ -1812,15 +1941,14 @@ class KDTreeSingleIndexAdaptor /** @} */ public: - /** Make sure the auxiliary list \a vind has the same size than the current - * dataset, and re-generate if size has changed. */ + /** Make sure the auxiliary list \a vind has the same size as the + * current dataset, and re-generate if size has changed. */ void init_vind() { // Create a permutable array of indices to the input vectors. Base::size_ = dataset_.kdtree_get_point_count(); if (Base::vAcc_.size() != Base::size_) Base::vAcc_.resize(Base::size_); - for (IndexType i = 0; i < static_cast(Base::size_); i++) - Base::vAcc_[i] = i; + for (IndexType i = 0; i < static_cast(Base::size_); i++) Base::vAcc_[i] = i; } void computeBoundingBox(BoundingBox& bbox) @@ -1840,15 +1968,13 @@ class KDTreeSingleIndexAdaptor "no data points found."); for (Dimension i = 0; i < dims; ++i) { - bbox[i].low = bbox[i].high = - this->dataset_get(*this, Base::vAcc_[0], i); + bbox[i].low = bbox[i].high = this->dataset_get(*this, Base::vAcc_[0], i); } for (Offset k = 1; k < N; ++k) { for (Dimension i = 0; i < dims; ++i) { - const auto val = - this->dataset_get(*this, Base::vAcc_[k], i); + const auto val = this->dataset_get(*this, Base::vAcc_[k], i); if (val < bbox[i].low) bbox[i].low = val; if (val > bbox[i].high) bbox[i].high = val; } @@ -1856,6 +1982,17 @@ class KDTreeSingleIndexAdaptor } } + bool contains(const BoundingBox& bbox, IndexType idx) const + { + const auto dims = (DIM > 0 ? DIM : Base::dim_); + for (Dimension i = 0; i < dims; ++i) + { + const auto point = this->dataset_.kdtree_get_pt(idx, i); + if (point < bbox[i].low || point > bbox[i].high) return false; + } + return true; + } + /** * Performs an exact search in the tree starting from a node. * \tparam RESULTSET Should be any ResultSet @@ -1864,21 +2001,18 @@ class KDTreeSingleIndexAdaptor */ template bool searchLevel( - RESULTSET& result_set, const ElementType* vec, const NodePtr node, - DistanceType mindist, distance_vector_t& dists, - const float epsError) const + RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, + distance_vector_t& dists, const float epsError) const { - /* If this is a leaf node, then do check and return. */ - if ((node->child1 == nullptr) && (node->child2 == nullptr)) + // If this is a leaf node, then do check and return. + if (!node->child1) // (if one node is nullptr, both are) { - DistanceType worst_dist = result_set.worstDist(); - for (Offset i = node->node_type.lr.left; - i < node->node_type.lr.right; ++i) + for (Offset i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { const IndexType accessor = Base::vAcc_[i]; // reorder... : i; - DistanceType dist = distance_.evalMetric( - vec, accessor, (DIM > 0 ? DIM : Base::dim_)); - if (dist < worst_dist) + DistanceType dist = + distance_.evalMetric(vec, accessor, (DIM > 0 ? DIM : Base::dim_)); + if (dist < result_set.worstDist()) { if (!result_set.addPoint(dist, Base::vAcc_[i])) { @@ -1904,15 +2038,13 @@ class KDTreeSingleIndexAdaptor { bestChild = node->child1; otherChild = node->child2; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divhigh, idx); + cut_dist = distance_.accum_dist(val, node->node_type.sub.divhigh, idx); } else { bestChild = node->child2; otherChild = node->child1; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divlow, idx); + cut_dist = distance_.accum_dist(val, node->node_type.sub.divlow, idx); } /* Call recursively to search next level down. */ @@ -1928,8 +2060,7 @@ class KDTreeSingleIndexAdaptor dists[idx] = cut_dist; if (mindist * epsError <= result_set.worstDist()) { - if (!searchLevel( - result_set, vec, otherChild, mindist, dists, epsError)) + if (!searchLevel(result_set, vec, otherChild, mindist, dists, epsError)) { // the resultset doesn't want to receive any more points, we're // done searching! @@ -1946,10 +2077,7 @@ class KDTreeSingleIndexAdaptor * when loading the index object it must be constructed associated to the * same source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ - void saveIndex(std::ostream& stream) const - { - Base::saveIndex(*this, stream); - } + void saveIndex(std::ostream& stream) const { Base::saveIndex(*this, stream); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so @@ -1997,14 +2125,11 @@ class KDTreeSingleIndexAdaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename IndexType = uint32_t> +template class KDTreeSingleIndexDynamicAdaptor_ : public KDTreeBaseClass< - KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>, - Distance, DatasetAdaptor, DIM, IndexType> + KDTreeSingleIndexDynamicAdaptor_, Distance, + DatasetAdaptor, DIM, IndexType> { public: /** @@ -2019,8 +2144,7 @@ class KDTreeSingleIndexDynamicAdaptor_ Distance distance_; using Base = typename nanoflann::KDTreeBaseClass< - nanoflann::KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>, + nanoflann::KDTreeSingleIndexDynamicAdaptor_, Distance, DatasetAdaptor, DIM, IndexType>; using ElementType = typename Base::ElementType; @@ -2060,12 +2184,8 @@ class KDTreeSingleIndexDynamicAdaptor_ KDTreeSingleIndexDynamicAdaptor_( const Dimension dimensionality, const DatasetAdaptor& inputData, std::vector& treeIndex, - const KDTreeSingleIndexAdaptorParams& params = - KDTreeSingleIndexAdaptorParams()) - : dataset_(inputData), - index_params_(params), - treeIndex_(treeIndex), - distance_(inputData) + const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams()) + : dataset_(inputData), index_params_(params), treeIndex_(treeIndex), distance_(inputData) { Base::size_ = 0; Base::size_at_index_build_ = 0; @@ -2079,18 +2199,15 @@ class KDTreeSingleIndexDynamicAdaptor_ } else { - Base::n_thread_build_ = - std::max(std::thread::hardware_concurrency(), 1u); + Base::n_thread_build_ = std::max(std::thread::hardware_concurrency(), 1u); } } /** Explicitly default the copy constructor */ - KDTreeSingleIndexDynamicAdaptor_( - const KDTreeSingleIndexDynamicAdaptor_& rhs) = default; + KDTreeSingleIndexDynamicAdaptor_(const KDTreeSingleIndexDynamicAdaptor_& rhs) = default; /** Assignment operator definiton */ - KDTreeSingleIndexDynamicAdaptor_ operator=( - const KDTreeSingleIndexDynamicAdaptor_& rhs) + KDTreeSingleIndexDynamicAdaptor_ operator=(const KDTreeSingleIndexDynamicAdaptor_& rhs) { KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); std::swap(Base::vAcc_, tmp.Base::vAcc_); @@ -2118,8 +2235,7 @@ class KDTreeSingleIndexDynamicAdaptor_ // construct the tree if (Base::n_thread_build_ == 1) { - Base::root_node_ = - this->divideTree(*this, 0, Base::size_, Base::root_bbox_); + Base::root_node_ = this->divideTree(*this, 0, Base::size_, Base::root_bbox_); } else { @@ -2159,8 +2275,7 @@ class KDTreeSingleIndexDynamicAdaptor_ */ template bool findNeighbors( - RESULTSET& result, const ElementType* vec, - const SearchParameters& searchParams = {}) const + RESULTSET& result, const ElementType* vec, const SearchParameters& searchParams = {}) const { assert(vec); if (this->size(*this) == 0) return false; @@ -2175,6 +2290,9 @@ class KDTreeSingleIndexDynamicAdaptor_ static_cast(0)); DistanceType dist = this->computeInitialDistances(*this, vec, dists); searchLevel(result, vec, Base::root_node_, dist, dists, epsError); + + if (searchParams.sorted) result.sort(); + return result.full(); } @@ -2193,9 +2311,8 @@ class KDTreeSingleIndexDynamicAdaptor_ * number of elements in the tree is less than `num_closest`. */ Size knnSearch( - const ElementType* query_point, const Size num_closest, - IndexType* out_indices, DistanceType* out_distances, - const SearchParameters& searchParams = {}) const + const ElementType* query_point, const Size num_closest, IndexType* out_indices, + DistanceType* out_distances, const SearchParameters& searchParams = {}) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances); @@ -2225,12 +2342,10 @@ class KDTreeSingleIndexDynamicAdaptor_ Size radiusSearch( const ElementType* query_point, const DistanceType& radius, std::vector>& IndicesDists, - const SearchParameters& searchParams = {}) const + const SearchParameters& searchParams = {}) const { - RadiusResultSet resultSet( - radius, IndicesDists); - const size_t nFound = - radiusSearchCustomCallback(query_point, resultSet, searchParams); + RadiusResultSet resultSet(radius, IndicesDists); + const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); return nFound; } @@ -2269,15 +2384,13 @@ class KDTreeSingleIndexDynamicAdaptor_ "no data points found."); for (Dimension i = 0; i < dims; ++i) { - bbox[i].low = bbox[i].high = - this->dataset_get(*this, Base::vAcc_[0], i); + bbox[i].low = bbox[i].high = this->dataset_get(*this, Base::vAcc_[0], i); } for (Offset k = 1; k < N; ++k) { for (Dimension i = 0; i < dims; ++i) { - const auto val = - this->dataset_get(*this, Base::vAcc_[k], i); + const auto val = this->dataset_get(*this, Base::vAcc_[k], i); if (val < bbox[i].low) bbox[i].low = val; if (val > bbox[i].high) bbox[i].high = val; } @@ -2291,27 +2404,22 @@ class KDTreeSingleIndexDynamicAdaptor_ */ template void searchLevel( - RESULTSET& result_set, const ElementType* vec, const NodePtr node, - DistanceType mindist, distance_vector_t& dists, - const float epsError) const + RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, + distance_vector_t& dists, const float epsError) const { - /* If this is a leaf node, then do check and return. */ - if ((node->child1 == nullptr) && (node->child2 == nullptr)) + // If this is a leaf node, then do check and return. + if (!node->child1) // (if one node is nullptr, both are) { - DistanceType worst_dist = result_set.worstDist(); - for (Offset i = node->node_type.lr.left; - i < node->node_type.lr.right; ++i) + for (Offset i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { const IndexType index = Base::vAcc_[i]; // reorder... : i; if (treeIndex_[index] == -1) continue; - DistanceType dist = distance_.evalMetric( - vec, index, (DIM > 0 ? DIM : Base::dim_)); - if (dist < worst_dist) + DistanceType dist = distance_.evalMetric(vec, index, (DIM > 0 ? DIM : Base::dim_)); + if (dist < result_set.worstDist()) { if (!result_set.addPoint( static_cast(dist), - static_cast( - Base::vAcc_[i]))) + static_cast(Base::vAcc_[i]))) { // the resultset doesn't want to receive any more // points, we're done searching! @@ -2335,15 +2443,13 @@ class KDTreeSingleIndexDynamicAdaptor_ { bestChild = node->child1; otherChild = node->child2; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divhigh, idx); + cut_dist = distance_.accum_dist(val, node->node_type.sub.divhigh, idx); } else { bestChild = node->child2; otherChild = node->child1; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divlow, idx); + cut_dist = distance_.accum_dist(val, node->node_type.sub.divlow, idx); } /* Call recursively to search next level down. */ @@ -2389,21 +2495,17 @@ class KDTreeSingleIndexDynamicAdaptor_ * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType * Will be typically size_t or int */ -template < - typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename IndexType = uint32_t> +template class KDTreeSingleIndexDynamicAdaptor { public: using ElementType = typename Distance::ElementType; using DistanceType = typename Distance::DistanceType; - using Offset = typename KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM>::Offset; - using Size = typename KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM>::Size; - using Dimension = typename KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM>::Dimension; + using Offset = typename KDTreeSingleIndexDynamicAdaptor_::Offset; + using Size = typename KDTreeSingleIndexDynamicAdaptor_::Size; + using Dimension = + typename KDTreeSingleIndexDynamicAdaptor_::Dimension; protected: Size leaf_max_size_; @@ -2424,17 +2526,14 @@ class KDTreeSingleIndexDynamicAdaptor Dimension dim_; //!< Dimensionality of each data point - using index_container_t = KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>; + using index_container_t = + KDTreeSingleIndexDynamicAdaptor_; std::vector index_; public: /** Get a const ref to the internal list of indices; the number of indices * is adapted dynamically as the dataset grows in size. */ - const std::vector& getAllIndices() const - { - return index_; - } + const std::vector& getAllIndices() const { return index_; } private: /** finds position of least significant unset bit */ @@ -2452,11 +2551,10 @@ class KDTreeSingleIndexDynamicAdaptor /** Creates multiple empty trees to handle dynamic support */ void init() { - using my_kd_tree_t = KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>; + using my_kd_tree_t = + KDTreeSingleIndexDynamicAdaptor_; std::vector index( - treeCount_, - my_kd_tree_t(dim_ /*dim*/, dataset_, treeIndex_, index_params_)); + treeCount_, my_kd_tree_t(dim_ /*dim*/, dataset_, treeIndex_, index_params_)); index_ = index; } @@ -2480,9 +2578,8 @@ class KDTreeSingleIndexDynamicAdaptor */ explicit KDTreeSingleIndexDynamicAdaptor( const int dimensionality, const DatasetAdaptor& inputData, - const KDTreeSingleIndexAdaptorParams& params = - KDTreeSingleIndexAdaptorParams(), - const size_t maximumPointCount = 1000000000U) + const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams(), + const size_t maximumPointCount = 1000000000U) : dataset_(inputData), index_params_(params), distance_(inputData) { treeCount_ = static_cast(std::log2(maximumPointCount)) + 1; @@ -2493,13 +2590,15 @@ class KDTreeSingleIndexDynamicAdaptor leaf_max_size_ = params.leaf_max_size; init(); const size_t num_initial_points = dataset_.kdtree_get_point_count(); - if (num_initial_points > 0) { addPoints(0, num_initial_points - 1); } + if (num_initial_points > 0) + { + addPoints(0, num_initial_points - 1); + } } /** Deleted copy constructor*/ explicit KDTreeSingleIndexDynamicAdaptor( - const KDTreeSingleIndexDynamicAdaptor< - Distance, DatasetAdaptor, DIM, IndexType>&) = delete; + const KDTreeSingleIndexDynamicAdaptor&) = delete; /** Add points to the set, Inserts all points from [start, end] */ void addPoints(IndexType start, IndexType end) @@ -2522,12 +2621,10 @@ class KDTreeSingleIndexDynamicAdaptor for (int i = 0; i < pos; i++) { - for (int j = 0; j < static_cast(index_[i].vAcc_.size()); - j++) + for (int j = 0; j < static_cast(index_[i].vAcc_.size()); j++) { index_[pos].vAcc_.push_back(index_[i].vAcc_[j]); - if (treeIndex_[index_[i].vAcc_[j]] != -1) - treeIndex_[index_[i].vAcc_[j]] = pos; + if (treeIndex_[index_[i].vAcc_[j]] != -1) treeIndex_[index_[i].vAcc_[j]] = pos; } index_[i].vAcc_.clear(); } @@ -2568,8 +2665,7 @@ class KDTreeSingleIndexDynamicAdaptor */ template bool findNeighbors( - RESULTSET& result, const ElementType* vec, - const SearchParameters& searchParams = {}) const + RESULTSET& result, const ElementType* vec, const SearchParameters& searchParams = {}) const { for (size_t i = 0; i < treeCount_; i++) { @@ -2609,17 +2705,13 @@ template < bool row_major = true> struct KDTreeEigenMatrixAdaptor { - using self_t = - KDTreeEigenMatrixAdaptor; + using self_t = KDTreeEigenMatrixAdaptor; using num_t = typename MatrixType::Scalar; using IndexType = typename MatrixType::Index; - using metric_t = typename Distance::template traits< - num_t, self_t, IndexType>::distance_t; + using metric_t = typename Distance::template traits::distance_t; using index_t = KDTreeSingleIndexAdaptor< - metric_t, self_t, - row_major ? MatrixType::ColsAtCompileTime - : MatrixType::RowsAtCompileTime, + metric_t, self_t, row_major ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, IndexType>; index_t* index_; //! The kd-tree index for the user to call its methods as @@ -2631,8 +2723,7 @@ struct KDTreeEigenMatrixAdaptor /// Constructor: takes a const ref to the matrix object with the data points explicit KDTreeEigenMatrixAdaptor( - const Dimension dimensionality, - const std::reference_wrapper& mat, + const Dimension dimensionality, const std::reference_wrapper& mat, const int leaf_max_size = 10, const unsigned int n_thread_build = 1) : m_data_matrix(mat) { @@ -2648,8 +2739,7 @@ struct KDTreeEigenMatrixAdaptor index_ = new index_t( dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams( - leaf_max_size, nanoflann::KDTreeSingleIndexAdaptorFlags::None, - n_thread_build)); + leaf_max_size, nanoflann::KDTreeSingleIndexAdaptorFlags::None, n_thread_build)); } public: @@ -2669,8 +2759,8 @@ struct KDTreeEigenMatrixAdaptor * distances. */ void query( - const num_t* query_point, const Size num_closest, - IndexType* out_indices, num_t* out_distances) const + const num_t* query_point, const Size num_closest, IndexType* out_indices, + num_t* out_distances) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances); @@ -2680,11 +2770,11 @@ struct KDTreeEigenMatrixAdaptor /** @name Interface expected by KDTreeSingleIndexAdaptor * @{ */ - const self_t& derived() const { return *this; } - self_t& derived() { return *this; } + inline const self_t& derived() const noexcept { return *this; } + inline self_t& derived() noexcept { return *this; } // Must return the number of data points - Size kdtree_get_point_count() const + inline Size kdtree_get_point_count() const { if (row_major) return m_data_matrix.get().rows(); @@ -2693,7 +2783,7 @@ struct KDTreeEigenMatrixAdaptor } // Returns the dim'th component of the idx'th point in the class: - num_t kdtree_get_pt(const IndexType idx, size_t dim) const + inline num_t kdtree_get_pt(const IndexType idx, size_t dim) const { if (row_major) return m_data_matrix.get().coeff(idx, IndexType(dim)); @@ -2707,7 +2797,7 @@ struct KDTreeEigenMatrixAdaptor // in "bb" so it can be avoided to redo it again. Look at bb.size() to // find out the expected dimensionality (e.g. 2 or 3 for point clouds) template - bool kdtree_get_bbox(BBOX& /*bb*/) const + inline bool kdtree_get_bbox(BBOX& /*bb*/) const { return false; } @@ -2719,3 +2809,5 @@ struct KDTreeEigenMatrixAdaptor /** @} */ // end of grouping } // namespace nanoflann + +#undef NANOFLANN_RESTRICT From 042ffefdfa8193803bd7dffd85ce5f11cfba102e Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 22:37:14 -0700 Subject: [PATCH 3/7] add knn tests --- test/CMakeLists.txt | 1 + test/src/utilities_test.cpp | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 test/src/utilities_test.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ddb63090..a55687fe 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -110,6 +110,7 @@ set(TEST_SRCS src/flip_geodesic_test.cpp src/stl_reader_test.cpp src/surface_misc_test.cpp + src/utilities_test.cpp ) add_executable(geometry-central-test "${TEST_SRCS}") diff --git a/test/src/utilities_test.cpp b/test/src/utilities_test.cpp new file mode 100644 index 00000000..05ca591f --- /dev/null +++ b/test/src/utilities_test.cpp @@ -0,0 +1,36 @@ +#include "geometrycentral/utilities/knn.h" +#include "geometrycentral/utilities/vector3.h" + +#include "gtest/gtest.h" + +using namespace geometrycentral; + +// ============================================================ +// =============== NearestNeighborFinder tests +// ============================================================ + +TEST(UtilitiesSuite, KNNReturnsClosestPoint) { + std::vector points = {{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {10, 0, 0}}; + NearestNeighborFinder finder(points); + + std::vector result = finder.kNearest({1.1, 0, 0}, 1); + ASSERT_EQ(result.size(), 1); + EXPECT_EQ(result[0], 1); // closest to {1,0,0} +} + +TEST(UtilitiesSuite, KNNNeighborsExcludesSource) { + std::vector points = {{0, 0, 0}, {1, 0, 0}, {2, 0, 0}}; + NearestNeighborFinder finder(points); + + std::vector result = finder.kNearestNeighbors(1, 1); + ASSERT_EQ(result.size(), 1); + EXPECT_NE(result[0], 1); // source index should not appear +} + +TEST(UtilitiesSuite, KNNRadiusSearch) { + std::vector points = {{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {10, 0, 0}}; + NearestNeighborFinder finder(points); + + std::vector result = finder.radiusSearch({0, 0, 0}, 1.5); + EXPECT_EQ(result.size(), 2); // {0,0,0} and {1,0,0} are within radius 1.5 +} From f6166dfc7fb6d31b65e18403b20e5750df381c46 Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 22:41:07 -0700 Subject: [PATCH 4/7] add nearest neighbor docs --- docs/docs/utilities/nearest_neighbors.md | 42 ++++++++++++++++++++++++ docs/mkdocs.yml | 3 +- 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 docs/docs/utilities/nearest_neighbors.md diff --git a/docs/docs/utilities/nearest_neighbors.md b/docs/docs/utilities/nearest_neighbors.md new file mode 100644 index 00000000..03d8bbbe --- /dev/null +++ b/docs/docs/utilities/nearest_neighbors.md @@ -0,0 +1,42 @@ +A simple utility class for 3D nearest-neighbor queries, backed by a [nanoflann](https://github.com/jlblancoc/nanoflann) KD-tree. + +`#!cpp #include "geometrycentral/utilities/knn.h"` + +```cpp +#include "geometrycentral/utilities/knn.h" +using namespace geometrycentral; + +std::vector points = { {0,0,0}, {1,0,0}, {2,0,0}, {3,0,0} }; +NearestNeighborFinder finder(points); + +// 2 nearest points to a query location +std::vector nearest = finder.kNearest({1.1, 0, 0}, 2); + +// 2 nearest neighbors of points[1], excluding itself +std::vector neighbors = finder.kNearestNeighbors(1, 2); + +// all points within radius 1.5 of a query location +std::vector inBall = finder.radiusSearch({0, 0, 0}, 1.5); +``` + +All query results are indices into the input `points` vector. + +??? func "`#!cpp NearestNeighborFinder::NearestNeighborFinder(const std::vector& points)`" + + Construct a finder over the given point set and build the spatial index. All subsequent queries return indices into this vector. + +### Queries + +??? func "`#!cpp std::vector NearestNeighborFinder::kNearest(Vector3 query, size_t k)`" + + Returns the indices of the `k` closest points in the input set to `query`, in order of increasing distance. `query` need not be one of the input points. + +??? func "`#!cpp std::vector NearestNeighborFinder::kNearestNeighbors(size_t sourceInd, size_t k)`" + + Returns the indices of the `k` closest points to `points[sourceInd]`, excluding `sourceInd` itself. Useful for computing point-to-point neighbor relationships within the input set. + +??? func "`#!cpp std::vector NearestNeighborFinder::radiusSearch(Vector3 query, double rad)`" + + Returns the indices of all points within distance `rad` of `query`. + + Pass the actual desired radius, not the squared radius that some other libraries use. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 29f17e70..af122c6a 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -107,8 +107,9 @@ nav: - 'Matrix Types' : 'numerical/matrix_types.md' - 'Linear Algebra Utilities' : 'numerical/linear_algebra_utilities.md' - 'Linear Solvers' : 'numerical/linear_solvers.md' - - Utilities: + - Utilities: - 'Miscellaneous' : 'utilities/miscellaneous.md' - 'Vector2' : 'utilities/vector2.md' - 'Vector3' : 'utilities/vector3.md' + - 'Nearest Neighbors' : 'utilities/nearest_neighbors.md' - 'Utilities for Eigen Interoperability' : 'utilities/eigenmap.md' From 206d29082d1427c45bf723b0a8cefdea98f57f46 Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 23:24:02 -0700 Subject: [PATCH 5/7] add simple mesh ray tracer wrapping nanort --- .../docs/surface/utilities/mesh_ray_tracer.md | 66 +++++++++++ docs/mkdocs.yml | 3 +- .../geometrycentral/surface/mesh_ray_tracer.h | 45 ++++++++ src/CMakeLists.txt | 2 + src/surface/mesh_ray_tracer.cpp | 105 ++++++++++++++++++ src/surface/simple_polygon_mesh.cpp | 50 +-------- test/src/utilities_test.cpp | 55 +++++++++ 7 files changed, 281 insertions(+), 45 deletions(-) create mode 100644 docs/docs/surface/utilities/mesh_ray_tracer.md create mode 100644 include/geometrycentral/surface/mesh_ray_tracer.h create mode 100644 src/surface/mesh_ray_tracer.cpp diff --git a/docs/docs/surface/utilities/mesh_ray_tracer.md b/docs/docs/surface/utilities/mesh_ray_tracer.md new file mode 100644 index 00000000..8545ccd5 --- /dev/null +++ b/docs/docs/surface/utilities/mesh_ray_tracer.md @@ -0,0 +1,66 @@ +A BVH-accelerated ray-triangle intersection utility for triangle meshes, backed by [nanort](https://github.com/lighttransport/nanort). + +`#!cpp #include "geometrycentral/surface/mesh_ray_tracer.h"` + +```cpp +#include "geometrycentral/surface/mesh_ray_tracer.h" +using namespace geometrycentral::surface; + +// Build the acceleration structure once +MeshRayTracer tracer(geometry); + +// Trace a ray — origin and direction, direction need not be normalized +RayHitResult hit = tracer.trace({0.5, 0.5, 10.0}, {0.0, 0.0, -1.0}); + +if (hit.hit) { + // distance along ray + double t = hit.t; + + // which triangle was hit + size_t faceIdx = hit.faceIndex; + + // barycentric coords in that triangle + Vector3 bary = hit.faceCoords; +} +``` + +The mesh must be triangulated. The BVH is built once at construction and can be queried any number of times. + +### Result struct + +`RayHitResult` is returned by every `trace()` call. + +```cpp +struct RayHitResult { + bool hit; // true if the ray intersected the mesh + double t; // distance along the ray to the hit point + size_t faceIndex; // index of the hit face in the mesh + Vector3 faceCoords; // barycentric coords in the hit face, in the same order as the face's vertices are given +}; +``` + +### Construction + +??? func "`#!cpp MeshRayTracer::MeshRayTracer(EmbeddedGeometryInterface& geom)`" + + Build the BVH over the triangles of `geom`'s mesh. The mesh must be fully triangular. + +??? func "`#!cpp MeshRayTracer::MeshRayTracer(const SimplePolygonMesh& mesh)`" + + Build the BVH from a `SimplePolygonMesh`. All polygons must be triangles. + +### Queries + +??? func "`#!cpp RayHitResult MeshRayTracer::trace(Vector3 origin, Vector3 dir) const`" + + Trace a ray from `origin` in direction `dir` (need not be normalized). + +### Utilities + +??? func "`#!cpp SurfacePoint toSurfacePoint(const RayHitResult& hit, SurfaceMesh& mesh)`" + + Convert a `RayHitResult` to a `SurfacePoint` on `mesh`. + + Only meaningful when the tracer was built from that mesh (or its geometry). Can only be called if the result was a hit. + + The returned point is a face point with the hit's barycentric coordinates. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index af122c6a..44fd8176 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -67,11 +67,12 @@ nav: - Geometry: - 'Overview' : 'surface/geometry/geometry.md' - 'Quantities' : 'surface/geometry/quantities.md' - - Utilities: + - Utilities: - 'Simple Polygon Mesh' : 'surface/utilities/simple_polygon_mesh.md' - 'I/O' : 'surface/utilities/io.md' - 'Surface Point' : 'surface/utilities/surface_point.md' - 'Barycentric Vector': 'surface/utilities/barycentric_vector.md' + - 'Mesh Ray Tracer' : 'surface/utilities/mesh_ray_tracer.md' - Algorithms: - 'Direction Fields' : 'surface/algorithms/direction_fields.md' - 'Embedding' : 'surface/algorithms/embedding.md' diff --git a/include/geometrycentral/surface/mesh_ray_tracer.h b/include/geometrycentral/surface/mesh_ray_tracer.h new file mode 100644 index 00000000..4acf2150 --- /dev/null +++ b/include/geometrycentral/surface/mesh_ray_tracer.h @@ -0,0 +1,45 @@ +#pragma once + +#include "geometrycentral/surface/embedded_geometry_interface.h" +#include "geometrycentral/surface/simple_polygon_mesh.h" +#include "geometrycentral/surface/surface_point.h" +#include "geometrycentral/utilities/vector3.h" + +#include +#include +#include + +namespace geometrycentral { +namespace surface { + +struct RayHitResult { + bool hit = false; + double t = std::numeric_limits::infinity(); // distance along the ray to the hit + size_t faceIndex = std::numeric_limits::max(); // index of the hit face + Vector3 faceCoords{0., 0., 0.}; // barycentric coords in the hit face, in the + // same order as the face's vertices are given +}; + +// Acceleration structure for ray-triangle intersection queries against a triangle mesh. +// Backed by a nanort BVH. The mesh must be triangulated. +class MeshRayTracer { +public: + MeshRayTracer(EmbeddedGeometryInterface& geom); + MeshRayTracer(const SimplePolygonMesh& mesh); + ~MeshRayTracer(); + + // Trace a ray from `origin` in direction `dir` (need not be normalized). + // Returns hit info; result.hit is false if no intersection is found. + RayHitResult trace(Vector3 origin, Vector3 dir) const; + +private: + class Impl; + std::unique_ptr impl; +}; + +// Convert a RayHitResult to a SurfacePoint on the given mesh. +// Only valid when the tracer was built from that mesh (or its geometry). +SurfacePoint toSurfacePoint(const RayHitResult& hit, SurfaceMesh& mesh); + +} // namespace surface +} // namespace geometrycentral diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 804e37aa..b0f0078a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,6 +14,7 @@ SET(SRCS surface/surface_mesh_factories.cpp surface/meshio.cpp surface/simple_polygon_mesh.cpp + surface/mesh_ray_tracer.cpp surface/rich_surface_mesh_data.cpp surface/base_geometry_interface.cpp @@ -133,6 +134,7 @@ SET(HEADERS ${INCLUDE_ROOT}/surface/signpost_intrinsic_triangulation.h ${INCLUDE_ROOT}/surface/signpost_intrinsic_triangulation.ipp ${INCLUDE_ROOT}/surface/simple_idt.h + ${INCLUDE_ROOT}/surface/mesh_ray_tracer.h ${INCLUDE_ROOT}/surface/simple_polygon_mesh.h ${INCLUDE_ROOT}/surface/stripe_patterns.h ${INCLUDE_ROOT}/surface/subdivide.h diff --git a/src/surface/mesh_ray_tracer.cpp b/src/surface/mesh_ray_tracer.cpp new file mode 100644 index 00000000..6e6a9be0 --- /dev/null +++ b/src/surface/mesh_ray_tracer.cpp @@ -0,0 +1,105 @@ +#include "geometrycentral/surface/mesh_ray_tracer.h" + +#include "nanort.h" + +namespace geometrycentral { +namespace surface { + +class MeshRayTracer::Impl { +public: + Impl(EmbeddedGeometryInterface& geom) { + geom.requireVertexPositions(); + + std::vector positions; + positions.reserve(geom.mesh.nVertices()); + for (Vertex v : geom.mesh.vertices()) { + positions.push_back(geom.vertexPositions[v]); + } + + std::vector> faces; + faces.reserve(geom.mesh.nFaces()); + for (Face f : geom.mesh.faces()) { + std::vector tri; + for (Vertex v : f.adjacentVertices()) tri.push_back(v.getIndex()); + if (tri.size() != 3) throw std::runtime_error("MeshRayTracer: mesh must be triangulated"); + faces.push_back(std::move(tri)); + } + + geom.unrequireVertexPositions(); + build(positions, faces); + } + + Impl(const SimplePolygonMesh& mesh) { build(mesh.vertexCoordinates, mesh.polygons); } + + void build(const std::vector& positions, const std::vector>& faces) { + double INF = std::numeric_limits::infinity(); + Vector3 bboxMin{INF, INF, INF}; + Vector3 bboxMax{-INF, -INF, -INF}; + + rawPositions.reserve(positions.size() * 3); + for (const Vector3& p : positions) { + rawPositions.push_back(p.x); + rawPositions.push_back(p.y); + rawPositions.push_back(p.z); + bboxMin = componentwiseMin(bboxMin, p); + bboxMax = componentwiseMax(bboxMax, p); + } + lengthScale = norm(bboxMax - bboxMin); + + rawFaces.reserve(faces.size() * 3); + for (const std::vector& f : faces) { + if (f.size() != 3) throw std::runtime_error("MeshRayTracer: mesh must be triangulated"); + for (size_t idx : f) rawFaces.push_back(static_cast(idx)); + } + + if (rawFaces.empty()) return; // empty mesh — all queries will miss + + nanort::TriangleMesh nanortMesh(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); + nanort::TriangleSAHPred nanortPred(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); + bool ok = accel.Build(static_cast(rawFaces.size() / 3), nanortMesh, nanortPred); + if (!ok) throw std::runtime_error("MeshRayTracer: BVH construction failed"); + } + + RayHitResult trace(Vector3 origin, Vector3 dir) const { + if (rawFaces.empty()) return {}; + + nanort::Ray ray; + ray.min_t = 1e-6 * lengthScale; + ray.max_t = 1e1 * lengthScale; + for (int i = 0; i < 3; i++) ray.org[i] = origin[i]; + for (int i = 0; i < 3; i++) ray.dir[i] = dir[i]; + + nanort::TriangleIntersection isect; + nanort::TriangleIntersector intersector(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); + bool hit = accel.Traverse(ray, intersector, &isect); + + if (!hit) return {}; + + RayHitResult result; + result.hit = true; + result.t = isect.t; + result.faceIndex = static_cast(isect.prim_id); + result.faceCoords = {1.0 - isect.u - isect.v, isect.u, isect.v}; + return result; + } + + std::vector rawPositions; + std::vector rawFaces; + nanort::BVHAccel accel; + double lengthScale = 1.0; +}; + + +MeshRayTracer::MeshRayTracer(EmbeddedGeometryInterface& geom) : impl(new Impl(geom)) {} +MeshRayTracer::MeshRayTracer(const SimplePolygonMesh& mesh) : impl(new Impl(mesh)) {} +MeshRayTracer::~MeshRayTracer() = default; + +RayHitResult MeshRayTracer::trace(Vector3 origin, Vector3 dir) const { return impl->trace(origin, dir); } + +SurfacePoint toSurfacePoint(const RayHitResult& hit, SurfaceMesh& mesh) { + if (!hit.hit) throw std::runtime_error("toSurfacePoint: RayHitResult is not a hit"); + return SurfacePoint(mesh.face(hit.faceIndex), hit.faceCoords); +} + +} // namespace surface +} // namespace geometrycentral diff --git a/src/surface/simple_polygon_mesh.cpp b/src/surface/simple_polygon_mesh.cpp index 4256fc88..be88913e 100644 --- a/src/surface/simple_polygon_mesh.cpp +++ b/src/surface/simple_polygon_mesh.cpp @@ -1,7 +1,8 @@ #include "geometrycentral/surface/simple_polygon_mesh.h" +#include "geometrycentral/surface/mesh_ray_tracer.h" + #include "happly.h" -#include "nanort.h" #include #include @@ -654,55 +655,16 @@ void SimplePolygonMesh::consistentlyOrientFaces(bool outwardOrient) { // If we are going to want it, build a raytracing acceleration structure to do the outward orientation step. int32_t N_RAYS_PER_FACE = 4; - std::vector rawPositions; - std::vector rawFaces; - nanort::BVHAccel accel; - double lengthScale = -1; + std::unique_ptr tracer; auto traceDist = [&](Vector3 root, Vector3 dir) { - nanort::Ray ray; - ray.min_t = 1e-6 * lengthScale; - ray.max_t = 1e1 * lengthScale; - for (int i = 0; i < 3; i++) ray.org[i] = root[i]; - for (int i = 0; i < 3; i++) ray.dir[i] = dir[i]; - nanort::BVHTraceOptions trace_options; - nanort::TriangleIntersection isect; - nanort::TriangleIntersector triangle_intersector(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); - bool hit = accel.Traverse(ray, triangle_intersector, &isect, trace_options); - return isect.t; + RayHitResult hit = tracer->trace(root, dir); + return hit.t; // infinity if no hit }; std::random_device rd; std::mt19937 gen(rd()); // we'll use this to generate rays on triangles std::uniform_real_distribution realDist(0., 1.); if (outwardOrient) { - - double INF = std::numeric_limits::infinity(); - Vector3 bboxMin{INF, INF, INF}; - Vector3 bboxMax{-INF, -INF, -INF}; - for (Vector3 v : vertexCoordinates) { - rawPositions.push_back(v.x); - rawPositions.push_back(v.y); - rawPositions.push_back(v.z); - bboxMin = componentwiseMin(bboxMin, v); - bboxMax = componentwiseMax(bboxMax, v); - } - lengthScale = norm(bboxMax - bboxMin); - for (const std::vector& poly : polygons) { - if (poly.size() != 3) { - throw std::runtime_error("consistentlyOrientFaces() with outwardOrient=true only supports triangular meshes"); - } - rawFaces.push_back(poly[0]); - rawFaces.push_back(poly[1]); - rawFaces.push_back(poly[2]); - } - - - nanort::TriangleMesh nanortMesh(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); - nanort::TriangleSAHPred nanortMeshPred(rawPositions.data(), rawFaces.data(), sizeof(double) * 3); - nanort::BVHBuildOptions options; // Use default options - bool ret = accel.Build(nFaces(), nanortMesh, nanortMeshPred, options); - if (!ret) { - throw std::runtime_error("BVH construction failed"); - } + tracer.reset(new MeshRayTracer(*this)); } // pre-build a connectivity lookup map diff --git a/test/src/utilities_test.cpp b/test/src/utilities_test.cpp index 05ca591f..84dad09a 100644 --- a/test/src/utilities_test.cpp +++ b/test/src/utilities_test.cpp @@ -1,9 +1,13 @@ #include "geometrycentral/utilities/knn.h" #include "geometrycentral/utilities/vector3.h" +#include "geometrycentral/surface/mesh_ray_tracer.h" +#include "geometrycentral/surface/surface_mesh_factories.h" + #include "gtest/gtest.h" using namespace geometrycentral; +using namespace geometrycentral::surface; // ============================================================ // =============== NearestNeighborFinder tests @@ -34,3 +38,54 @@ TEST(UtilitiesSuite, KNNRadiusSearch) { std::vector result = finder.radiusSearch({0, 0, 0}, 1.5); EXPECT_EQ(result.size(), 2); // {0,0,0} and {1,0,0} are within radius 1.5 } + +// ============================================================ +// =============== MeshRayTracer tests +// ============================================================ + +// Single triangle in the z=0 plane: vertices (0,0,0), (2,0,0), (0,2,0) +namespace { +std::tuple, std::unique_ptr> buildRayTracerTestMesh() { + std::vector verts = {{0, 0, 0}, {2, 0, 0}, {0, 2, 0}}; + std::vector> faces = {{0, 1, 2}}; + return makeManifoldSurfaceMeshAndGeometry(faces, verts); +} +} // namespace + +TEST(UtilitiesSuite, RayTracerHit) { + std::unique_ptr meshPtr; + std::unique_ptr geomPtr; + std::tie(meshPtr, geomPtr) = buildRayTracerTestMesh(); + + MeshRayTracer tracer(*geomPtr); + RayHitResult result = tracer.trace({0.5, 0.5, -1}, {0, 0, 1}); + + ASSERT_TRUE(result.hit); + EXPECT_NEAR(result.t, 1.0, 1e-6); + EXPECT_EQ(result.faceIndex, 0); +} + +TEST(UtilitiesSuite, RayTracerMiss) { + std::unique_ptr meshPtr; + std::unique_ptr geomPtr; + std::tie(meshPtr, geomPtr) = buildRayTracerTestMesh(); + + MeshRayTracer tracer(*geomPtr); + RayHitResult result = tracer.trace({5, 5, -1}, {0, 0, 1}); + + EXPECT_FALSE(result.hit); +} + +TEST(UtilitiesSuite, RayTracerBarycentrics) { + std::unique_ptr meshPtr; + std::unique_ptr geomPtr; + std::tie(meshPtr, geomPtr) = buildRayTracerTestMesh(); + + MeshRayTracer tracer(*geomPtr); + // Ray aimed at vertex 0 (origin) should return faceCoords near (1,0,0) + RayHitResult result = tracer.trace({0.01, 0.01, -1}, {0, 0, 1}); + + ASSERT_TRUE(result.hit); + double sumBary = result.faceCoords.x + result.faceCoords.y + result.faceCoords.z; + EXPECT_NEAR(sumBary, 1.0, 1e-6); +} From cd21d760f217ddc32162ae39997e85f0f72e1825 Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 23:33:06 -0700 Subject: [PATCH 6/7] remove nanoflann backward compatibility path (use the fixed standard path!) --- deps/CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index abeabba9..d1bbe25a 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -157,13 +157,6 @@ endif() if (NOT TARGET nanoflann) add_library(nanoflann INTERFACE) target_include_directories(nanoflann INTERFACE $) - - # For backward compatibility, we need to make a symlink in the build directory. - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/nanoflann) - file(CREATE_LINK - ${CMAKE_CURRENT_SOURCE_DIR}/nanoflann/include/nanoflann.hpp - ${CMAKE_CURRENT_BINARY_DIR}/include/nanoflann/nanoflann.hpp SYMBOLIC) - target_include_directories(nanoflann INTERFACE $) endif() if (NOT TARGET happly) From a01127d18aadf8d29cbfa8722e37f622b6b278ae Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 26 Apr 2026 23:35:30 -0700 Subject: [PATCH 7/7] move nanort to a private dependency (it's no longer in public headers) --- CMakeLists.txt | 4 ---- src/CMakeLists.txt | 5 +++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b6ea85d3..e4147536 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,10 +81,6 @@ install( FILES "${CMAKE_CURRENT_SOURCE_DIR}/deps/happly/happly.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -install( - FILES "${CMAKE_CURRENT_SOURCE_DIR}/deps/nanort/include/nanort/nanort.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nanort") - configure_package_config_file( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/GeometryCentralConfig.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/GeometryCentralConfig.cmake" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b0f0078a..51d06110 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -183,10 +183,11 @@ target_include_directories(geometry-central $ $ $ - $ $ PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../deps/nanoflann/include") + "${CMAKE_CURRENT_SOURCE_DIR}/../deps/nanoflann/include" + "${CMAKE_CURRENT_SOURCE_DIR}/../deps/nanort/include" +) # Keep Eigen out of the exported build-tree link interface to avoid exporting the # local helper target created in deps/CMakeLists.txt. The installed package adds