Skip to content

Commit 8670b53

Browse files
committed
[libc++] Optimize ranges::find for vector<bool>
Benchmark results: ``` ---------------------------------------------------------------- Benchmark old new ---------------------------------------------------------------- bm_vector_bool_ranges_find/1 5.64 ns 6.08 ns bm_vector_bool_ranges_find/2 16.5 ns 6.03 ns bm_vector_bool_ranges_find/3 20.3 ns 6.07 ns bm_vector_bool_ranges_find/4 22.2 ns 6.08 ns bm_vector_bool_ranges_find/5 23.5 ns 6.05 ns bm_vector_bool_ranges_find/6 24.4 ns 6.10 ns bm_vector_bool_ranges_find/7 26.7 ns 6.10 ns bm_vector_bool_ranges_find/8 25.0 ns 6.08 ns bm_vector_bool_ranges_find/16 27.9 ns 6.07 ns bm_vector_bool_ranges_find/64 44.5 ns 5.35 ns bm_vector_bool_ranges_find/512 243 ns 25.7 ns bm_vector_bool_ranges_find/4096 1858 ns 35.6 ns bm_vector_bool_ranges_find/32768 15461 ns 93.5 ns bm_vector_bool_ranges_find/262144 126462 ns 571 ns bm_vector_bool_ranges_find/1048576 497736 ns 2272 ns ``` Reviewed By: #libc, Mordante Spies: var-const, Mordante, libcxx-commits Differential Revision: https://reviews.llvm.org/D156039
1 parent 274c082 commit 8670b53

File tree

14 files changed

+151
-55
lines changed

14 files changed

+151
-55
lines changed

libcxx/benchmarks/algorithms/find.bench.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,32 @@ BENCHMARK(bm_ranges_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
4646
BENCHMARK(bm_ranges_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
4747
BENCHMARK(bm_ranges_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
4848

49+
static void bm_vector_bool_find(benchmark::State& state) {
50+
std::vector<bool> vec1(state.range(), false);
51+
std::mt19937_64 rng(std::random_device{}());
52+
53+
for (auto _ : state) {
54+
auto idx = rng() % vec1.size();
55+
vec1[idx] = true;
56+
benchmark::DoNotOptimize(vec1);
57+
benchmark::DoNotOptimize(std::find(vec1.begin(), vec1.end(), true));
58+
vec1[idx] = false;
59+
}
60+
}
61+
BENCHMARK(bm_vector_bool_find)->DenseRange(1, 8)->Range(16, 1 << 20);
62+
63+
static void bm_vector_bool_ranges_find(benchmark::State& state) {
64+
std::vector<bool> vec1(state.range(), false);
65+
std::mt19937_64 rng(std::random_device{}());
66+
67+
for (auto _ : state) {
68+
auto idx = rng() % vec1.size();
69+
vec1[idx] = true;
70+
benchmark::DoNotOptimize(vec1);
71+
benchmark::DoNotOptimize(std::ranges::find(vec1, true));
72+
vec1[idx] = false;
73+
}
74+
}
75+
BENCHMARK(bm_vector_bool_ranges_find)->DenseRange(1, 8)->Range(16, 1 << 20);
76+
4977
BENCHMARK_MAIN();

libcxx/include/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ set(files
249249
__bit/countr.h
250250
__bit/endian.h
251251
__bit/has_single_bit.h
252+
__bit/invert_if.h
252253
__bit/popcount.h
253254
__bit/rotate.h
254255
__bit_reference
@@ -415,6 +416,7 @@ set(files
415416
__functional/unary_negate.h
416417
__functional/weak_result_type.h
417418
__fwd/array.h
419+
__fwd/bit_reference.h
418420
__fwd/fstream.h
419421
__fwd/get.h
420422
__fwd/hash.h

libcxx/include/__algorithm/find.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@
1010
#ifndef _LIBCPP___ALGORITHM_FIND_H
1111
#define _LIBCPP___ALGORITHM_FIND_H
1212

13+
#include <__algorithm/min.h>
1314
#include <__algorithm/unwrap_iter.h>
15+
#include <__bit/countr.h>
16+
#include <__bit/invert_if.h>
1417
#include <__config>
1518
#include <__functional/identity.h>
1619
#include <__functional/invoke.h>
20+
#include <__fwd/bit_reference.h>
1721
#include <__string/constexpr_c_functions.h>
1822
#include <__type_traits/is_same.h>
1923

@@ -25,8 +29,12 @@
2529
# pragma GCC system_header
2630
#endif
2731

32+
_LIBCPP_PUSH_MACROS
33+
#include <__undef_macros>
34+
2835
_LIBCPP_BEGIN_NAMESPACE_STD
2936

37+
// generic implementation
3038
template <class _Iter, class _Sent, class _Tp, class _Proj>
3139
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
3240
__find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
@@ -36,6 +44,7 @@ __find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
3644
return __first;
3745
}
3846

47+
// trivially equality comparable implementations
3948
template <class _Tp,
4049
class _Up,
4150
class _Proj,
@@ -64,6 +73,51 @@ __find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
6473
}
6574
#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
6675

76+
// __bit_iterator implementation
77+
template <bool _ToFind, class _Cp, bool _IsConst>
78+
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, _IsConst>
79+
__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
80+
using _It = __bit_iterator<_Cp, _IsConst>;
81+
using __storage_type = typename _It::__storage_type;
82+
83+
const int __bits_per_word = _It::__bits_per_word;
84+
// do first partial word
85+
if (__first.__ctz_ != 0) {
86+
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
87+
__storage_type __dn = std::min(__clz_f, __n);
88+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
89+
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
90+
if (__b)
91+
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
92+
if (__n == __dn)
93+
return __first + __n;
94+
__n -= __dn;
95+
++__first.__seg_;
96+
}
97+
// do middle whole words
98+
for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) {
99+
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_);
100+
if (__b)
101+
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
102+
}
103+
// do last partial word
104+
if (__n > 0) {
105+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
106+
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
107+
if (__b)
108+
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
109+
}
110+
return _It(__first.__seg_, static_cast<unsigned>(__n));
111+
}
112+
113+
template <class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
114+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
115+
__find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
116+
if (static_cast<bool>(__value))
117+
return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
118+
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
119+
}
120+
67121
template <class _InputIterator, class _Tp>
68122
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
69123
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
@@ -74,4 +128,6 @@ find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
74128

75129
_LIBCPP_END_NAMESPACE_STD
76130

131+
_LIBCPP_POP_MACROS
132+
77133
#endif // _LIBCPP___ALGORITHM_FIND_H

libcxx/include/__bit/invert_if.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___BIT_INVERT_IF_H
10+
#define _LIBCPP___BIT_INVERT_IF_H
11+
12+
#include <__concepts/arithmetic.h>
13+
#include <__config>
14+
15+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
16+
# pragma GCC system_header
17+
#endif
18+
19+
_LIBCPP_BEGIN_NAMESPACE_STD
20+
21+
template <bool _Invert, class _Tp>
22+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __invert_if(_Tp __v) {
23+
if (_Invert)
24+
return ~__v;
25+
return __v;
26+
}
27+
28+
_LIBCPP_END_NAMESPACE_STD
29+
30+
#endif // _LIBCPP___BIT_INVERT_IF_H

libcxx/include/__bit_reference

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
#include <__algorithm/fill_n.h>
1515
#include <__algorithm/min.h>
1616
#include <__bit/countr.h>
17+
#include <__bit/invert_if.h>
1718
#include <__bit/popcount.h>
1819
#include <__config>
20+
#include <__fwd/bit_reference.h>
1921
#include <__iterator/iterator_traits.h>
2022
#include <__memory/construct_at.h>
2123
#include <__memory/pointer_traits.h>
@@ -32,8 +34,6 @@ _LIBCPP_PUSH_MACROS
3234

3335
_LIBCPP_BEGIN_NAMESPACE_STD
3436

35-
template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
36-
class __bit_iterator;
3737
template <class _Cp>
3838
class __bit_const_reference;
3939

@@ -171,59 +171,6 @@ private:
171171
__bit_const_reference& operator=(const __bit_const_reference&) = delete;
172172
};
173173

174-
template <bool _Invert, class _Tp>
175-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __invert_if(_Tp __v) {
176-
if (_Invert)
177-
return ~__v;
178-
return __v;
179-
}
180-
181-
// find
182-
183-
template <bool _ToFind, class _Cp, bool _IsConst>
184-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, _IsConst>
185-
__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
186-
using _It = __bit_iterator<_Cp, _IsConst>;
187-
using __storage_type = typename _It::__storage_type;
188-
189-
const int __bits_per_word = _It::__bits_per_word;
190-
// do first partial word
191-
if (__first.__ctz_ != 0) {
192-
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
193-
__storage_type __dn = std::min(__clz_f, __n);
194-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
195-
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
196-
if (__b)
197-
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
198-
if (__n == __dn)
199-
return __first + __n;
200-
__n -= __dn;
201-
++__first.__seg_;
202-
}
203-
// do middle whole words
204-
for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) {
205-
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_);
206-
if (__b)
207-
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
208-
}
209-
// do last partial word
210-
if (__n > 0) {
211-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
212-
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
213-
if (__b)
214-
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
215-
}
216-
return _It(__first.__seg_, static_cast<unsigned>(__n));
217-
}
218-
219-
template <class _Cp, bool _IsConst, class _Tp>
220-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
221-
find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value) {
222-
if (static_cast<bool>(__value))
223-
return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
224-
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
225-
}
226-
227174
// count
228175

229176
template <bool _ToCount, class _Cp, bool _IsConst>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___FWD_BIT_REFERENCE_H
10+
#define _LIBCPP___FWD_BIT_REFERENCE_H
11+
12+
#include <__config>
13+
14+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
15+
# pragma GCC system_header
16+
#endif
17+
18+
_LIBCPP_BEGIN_NAMESPACE_STD
19+
20+
template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
21+
class __bit_iterator;
22+
23+
_LIBCPP_END_NAMESPACE_STD
24+
25+
#endif // _LIBCPP___FWD_BIT_REFERENCE_H

libcxx/include/bitset

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ template <size_t N> struct hash<std::bitset<N>>;
113113
*/
114114

115115
#include <__algorithm/fill.h>
116+
#include <__algorithm/find.h>
116117
#include <__assert> // all public C++ headers provide the assertion handler
117118
#include <__bit_reference>
118119
#include <__config>

libcxx/test/libcxx/transitive_includes/cxx03.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ bitset cstddef
8989
bitset cstdint
9090
bitset cstdlib
9191
bitset cstring
92+
bitset cwchar
9293
bitset initializer_list
9394
bitset iosfwd
9495
bitset limits

libcxx/test/libcxx/transitive_includes/cxx11.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ bitset cstddef
8989
bitset cstdint
9090
bitset cstdlib
9191
bitset cstring
92+
bitset cwchar
9293
bitset initializer_list
9394
bitset iosfwd
9495
bitset limits

libcxx/test/libcxx/transitive_includes/cxx14.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ bitset cstddef
8989
bitset cstdint
9090
bitset cstdlib
9191
bitset cstring
92+
bitset cwchar
9293
bitset initializer_list
9394
bitset iosfwd
9495
bitset limits

0 commit comments

Comments
 (0)