From d426559752344901a354505f0b992a7241a262fe Mon Sep 17 00:00:00 2001 From: Laurent Bonnans Date: Sun, 28 Sep 2025 23:31:17 +0200 Subject: [PATCH] [libcxx] Unwrap iterators in __find_segment The segmented iterator optimized implementation of find now unwraps iterators when processing each segments. As a result, it is able to take better advantage to some find specializations: calling memchr/wmemchr for vector> ``` Benchmark Baseline Candidate Difference % Difference -------------------------------------------------------------- ---------- ----------- ------------ -------------- rng::find(join_view(deque>))_(process_all)/1024 71.13 61.19 -9.94 -13.97 rng::find(join_view(deque>))_(process_all)/32768 2359.19 2237.02 -122.17 -5.18 rng::find(join_view(deque>))_(process_all)/50 16.88 17.59 0.71 4.20 rng::find(join_view(deque>))_(process_all)/8 15.59 16.10 0.51 3.27 rng::find(join_view(deque>))_(process_all)/8192 647.01 532.75 -114.26 -17.66 rng::find(join_view(list>))_(process_all)/1024 689.76 680.74 -9.02 -1.31 rng::find(join_view(list>))_(process_all)/32768 22284.95 21500.26 -784.69 -3.52 rng::find(join_view(list>))_(process_all)/50 32.77 32.12 -0.65 -1.98 rng::find(join_view(list>))_(process_all)/8 6.11 5.92 -0.19 -3.11 rng::find(join_view(list>))_(process_all)/8192 5527.88 5373.43 -154.45 -2.79 rng::find(join_view(vector>))_(process_all)/1024 1305.59 1264.04 -41.55 -3.18 rng::find(join_view(vector>))_(process_all)/32768 42840.88 43322.64 481.76 1.12 rng::find(join_view(vector>))_(process_all)/50 57.52 62.35 4.82 8.38 rng::find(join_view(vector>))_(process_all)/8 6.06 5.98 -0.07 -1.18 rng::find(join_view(vector>))_(process_all)/8192 20700.53 21431.66 731.12 3.53 rng::find(join_view(vector>))_(process_all)/1024 310.64 18.34 -292.30 -94.09 rng::find(join_view(vector>))_(process_all)/32768 9424.96 531.99 -8892.97 -94.36 rng::find(join_view(vector>))_(process_all)/50 18.58 3.25 -15.32 -82.49 rng::find(join_view(vector>))_(process_all)/8 4.81 2.98 -1.84 -38.13 rng::find(join_view(vector>))_(process_all)/8192 2437.50 126.88 -2310.62 -94.79 rng::find(join_view(vector>))_(process_all)/1024 297.10 41.70 -255.39 -85.96 rng::find(join_view(vector>))_(process_all)/32768 9662.42 1822.05 -7840.36 -81.14 rng::find(join_view(vector>))_(process_all)/50 22.29 5.10 -17.19 -77.11 rng::find(join_view(vector>))_(process_all)/8 3.73 3.13 -0.60 -16.05 rng::find(join_view(vector>))_(process_all)/8192 2399.68 356.10 -2043.58 -85.16 ``` --- libcxx/include/__algorithm/find.h | 3 +- .../algorithms/nonmodifying/find.bench.cpp | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h index 5f32ae8fc9524..91c6a4e744a71 100644 --- a/libcxx/include/__algorithm/find.h +++ b/libcxx/include/__algorithm/find.h @@ -228,7 +228,8 @@ struct __find_segment { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const { - return std::__find(__first, __last, __value_, __proj); + return std::__rewrap_iter( + __first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value_, __proj)); } }; diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp index afea31fb59e95..7780b5a92a6c4 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,20 @@ int main(int argc, char** argv) { bm.template operator()>("rng::find_if_not(list) (" + comment + ")", ranges_find_if_not); }; + auto register_nested_container_benchmarks = [&](auto bm, std::string comment) { + // ranges_find + bm.template operator()>>( + "rng::find(join_view(vector>)) (" + comment + ")", ranges_find); + bm.template operator()>>( + "rng::find(join_view(vector>)) (" + comment + ")", ranges_find); + bm.template operator()>>( + "rng::find(join_view(list>)) (" + comment + ")", ranges_find); + bm.template operator()>>( + "rng::find(join_view(vector>)) (" + comment + ")", ranges_find); + bm.template operator()>>( + "rng::find(join_view(deque>)) (" + comment + ")", ranges_find); + }; + // Benchmark {std,ranges}::{find,find_if,find_if_not}(normal container) where we // bail out after 25% of elements { @@ -142,6 +157,44 @@ int main(int argc, char** argv) { register_benchmarks(bm, "process all"); } + // Benchmark {std,ranges}::{find,find_if,find_if_not}(join(normal container)) where we process the whole sequence + { + auto bm = [](std::string name, auto find) { + benchmark::RegisterBenchmark( + name, + [find](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + using C1 = typename Container::value_type; + using ValueType = typename C1::value_type; + ValueType x = Generate::random(); + ValueType y = random_different_from({x}); + Container c(segments); + auto n = size; + for (auto it = c.begin(); it != c.end(); it++) { + it->resize(std::min(seg_size, n), x); + n -= it->size(); + } + + auto view = c | std::views::join; + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + benchmark::DoNotOptimize(y); + auto result = find(view.begin(), view.end(), y); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(50) // non power-of-two + ->Arg(1024) + ->Arg(8192) + ->Arg(1 << 15); + }; + register_nested_container_benchmarks(bm, "process all"); + } + // Benchmark {std,ranges}::{find,find_if,find_if_not}(vector) where we process the whole sequence { auto bm = [](std::string name, auto find) {