3030#include " runtime/atomic.hpp"
3131#include " utilities/align.hpp"
3232#include " utilities/count_trailing_zeros.hpp"
33+ #include " utilities/powerOfTwo.hpp"
3334
3435inline void BitMap::set_bit (idx_t bit) {
3536 verify_index (bit);
@@ -165,64 +166,113 @@ inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
165166 }
166167}
167168
169+ // General notes regarding find_{first,last}_bit_impl.
170+ //
171+ // The first (last) word often contains an interesting bit, either due to
172+ // density or because of features of the calling algorithm. So it's important
173+ // to examine that word with a minimum of fuss, minimizing setup time for
174+ // additional words that will be wasted if the that word is indeed
175+ // interesting.
176+ //
177+ // The first (last) bit is similarly often interesting. When it matters
178+ // (density or features of the calling algorithm make it likely that bit is
179+ // set), going straight to counting bits compares poorly to examining that bit
180+ // first; the counting operations can be relatively expensive, plus there is
181+ // the additional range check (unless aligned). But when that bit isn't set,
182+ // the cost of having tested for it is relatively small compared to the rest
183+ // of the search.
184+ //
185+ // The benefit from aligned_right being true is relatively small. It saves an
186+ // operation in the setup of the word search loop. It also eliminates the
187+ // range check on the final result. However, callers often have a comparison
188+ // with end, and inlining may allow the two comparisons to be combined. It is
189+ // important when !aligned_right that return paths either return end or a
190+ // value dominated by a comparison with end. aligned_right is still helpful
191+ // when the caller doesn't have a range check because features of the calling
192+ // algorithm guarantee an interesting bit will be present.
193+ //
194+ // The benefit from aligned_left is even smaller, as there is no savings in
195+ // the setup of the word search loop.
196+
168197template <BitMap::bm_word_t flip, bool aligned_right>
169198inline BitMap::idx_t BitMap::find_first_bit_impl (idx_t beg, idx_t end) const {
170199 STATIC_ASSERT (flip == find_ones_flip || flip == find_zeros_flip);
171200 verify_range (beg, end);
172201 assert (!aligned_right || is_aligned (end, BitsPerWord), " end not aligned" );
173202
174- // The first word often contains an interesting bit, either due to
175- // density or because of features of the calling algorithm. So it's
176- // important to examine that first word with a minimum of fuss,
177- // minimizing setup time for later words that will be wasted if the
178- // first word is indeed interesting.
179-
180- // The benefit from aligned_right being true is relatively small.
181- // It saves an operation in the setup for the word search loop.
182- // It also eliminates the range check on the final result.
183- // However, callers often have a comparison with end, and
184- // inlining often allows the two comparisons to be combined; it is
185- // important when !aligned_right that return paths either return
186- // end or a value dominated by a comparison with end.
187- // aligned_right is still helpful when the caller doesn't have a
188- // range check because features of the calling algorithm guarantee
189- // an interesting bit will be present.
190-
191203 if (beg < end) {
192204 // Get the word containing beg, and shift out low bits.
193205 idx_t word_index = to_words_align_down (beg);
194206 bm_word_t cword = flipped_word (word_index, flip) >> bit_in_word (beg);
195- if ((cword & 1 ) != 0 ) {
196- // The first bit is similarly often interesting. When it matters
197- // (density or features of the calling algorithm make it likely
198- // the first bit is set), going straight to the next clause compares
199- // poorly with doing this check first; count_trailing_zeros can be
200- // relatively expensive, plus there is the additional range check.
201- // But when the first bit isn't set, the cost of having tested for
202- // it is relatively small compared to the rest of the search.
207+ if ((cword & 1 ) != 0 ) { // Test the beg bit.
203208 return beg;
204- } else if (cword != 0 ) {
205- // Flipped and shifted first word is non-zero.
206- idx_t result = beg + count_trailing_zeros (cword);
207- if (aligned_right || (result < end)) return result;
208- // Result is beyond range bound; return end.
209- } else {
210- // Flipped and shifted first word is zero. Word search through
209+ }
210+ // Position of bit0 of cword in the bitmap. Initially for shifted first word.
211+ idx_t cword_pos = beg;
212+ if (cword == 0 ) { // Test other bits in the first word.
213+ // First word had no interesting bits. Word search through
211214 // aligned up end for a non-zero flipped word.
212215 idx_t word_limit = aligned_right
213216 ? to_words_align_down (end) // Minuscule savings when aligned.
214217 : to_words_align_up (end);
215218 while (++word_index < word_limit) {
216219 cword = flipped_word (word_index, flip);
217220 if (cword != 0 ) {
218- idx_t result = bit_index (word_index) + count_trailing_zeros (cword);
219- if (aligned_right || (result < end)) return result;
220- // Result is beyond range bound; return end.
221- assert ((word_index + 1 ) == word_limit, " invariant" );
221+ // Update for found non-zero word, and join common tail to compute
222+ // result from cword_pos and non-zero cword.
223+ cword_pos = bit_index (word_index);
222224 break ;
223225 }
224226 }
225- // No bits in range; return end.
227+ }
228+ // For all paths reaching here, (cword != 0) is already known, so we
229+ // expect the compiler to not generate any code for it. Either first word
230+ // was non-zero, or found a non-zero word in range, or fully scanned range
231+ // (so cword is zero).
232+ if (cword != 0 ) {
233+ idx_t result = cword_pos + count_trailing_zeros (cword);
234+ if (aligned_right || (result < end)) return result;
235+ // Result is beyond range bound; return end.
236+ }
237+ }
238+ return end;
239+ }
240+
241+ template <BitMap::bm_word_t flip, bool aligned_left>
242+ inline BitMap::idx_t BitMap::find_last_bit_impl (idx_t beg, idx_t end) const {
243+ STATIC_ASSERT (flip == find_ones_flip || flip == find_zeros_flip);
244+ verify_range (beg, end);
245+ assert (!aligned_left || is_aligned (beg, BitsPerWord), " beg not aligned" );
246+
247+ if (beg < end) {
248+ // Get the last partial and flipped word in the range.
249+ idx_t last_bit_index = end - 1 ;
250+ idx_t word_index = to_words_align_down (last_bit_index);
251+ bm_word_t cword = flipped_word (word_index, flip);
252+ // Mask for extracting and testing bits of last word.
253+ bm_word_t last_bit_mask = bm_word_t (1 ) << bit_in_word (last_bit_index);
254+ if ((cword & last_bit_mask) != 0 ) { // Test last bit.
255+ return last_bit_index;
256+ }
257+ // Extract prior bits, clearing those above last_bit_index.
258+ cword &= (last_bit_mask - 1 );
259+ if (cword == 0 ) { // Test other bits in the last word.
260+ // Last word had no interesting bits. Word search through
261+ // aligned down beg for a non-zero flipped word.
262+ idx_t word_limit = to_words_align_down (beg);
263+ while (word_index-- > word_limit) {
264+ cword = flipped_word (word_index, flip);
265+ if (cword != 0 ) break ;
266+ }
267+ }
268+ // For all paths reaching here, (cword != 0) is already known, so we
269+ // expect the compiler to not generate any code for it. Either last word
270+ // was non-zero, or found a non-zero word in range, or fully scanned range
271+ // (so cword is zero).
272+ if (cword != 0 ) {
273+ idx_t result = bit_index (word_index) + log2i (cword);
274+ if (aligned_left || (result >= beg)) return result;
275+ // Result is below range bound; return end.
226276 }
227277 }
228278 return end;
@@ -243,6 +293,21 @@ BitMap::find_first_set_bit_aligned_right(idx_t beg, idx_t end) const {
243293 return find_first_bit_impl<find_ones_flip, true >(beg, end);
244294}
245295
296+ inline BitMap::idx_t
297+ BitMap::find_last_set_bit (idx_t beg, idx_t end) const {
298+ return find_last_bit_impl<find_ones_flip, false >(beg, end);
299+ }
300+
301+ inline BitMap::idx_t
302+ BitMap::find_last_clear_bit (idx_t beg, idx_t end) const {
303+ return find_last_bit_impl<find_zeros_flip, false >(beg, end);
304+ }
305+
306+ inline BitMap::idx_t
307+ BitMap::find_last_set_bit_aligned_left (idx_t beg, idx_t end) const {
308+ return find_last_bit_impl<find_ones_flip, true >(beg, end);
309+ }
310+
246311// IterateInvoker supports conditionally stopping iteration early. The
247312// invoker is called with the function to apply to each set index, along with
248313// the current index. If the function returns void then the invoker always
0 commit comments