From a063b997d905c64d18a9762a923b129846761d0f Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Thu, 26 Aug 2021 10:29:13 +0000 Subject: [PATCH] Optimized null count. --- benches/count_zeros.rs | 6 ++-- benches/unset_count.rs | 10 +++--- src/bitmap/immutable.rs | 8 ++--- src/bitmap/mutable.rs | 4 +-- src/bitmap/utils/mod.rs | 65 +++++++++++++++++++++++++++++------- tests/it/bitmap/utils/mod.rs | 44 ++++++++++++------------ 6 files changed, 89 insertions(+), 48 deletions(-) diff --git a/benches/count_zeros.rs b/benches/count_zeros.rs index f37d411bdf4..a435afd7826 100644 --- a/benches/count_zeros.rs +++ b/benches/count_zeros.rs @@ -1,4 +1,4 @@ -use arrow2::bitmap::utils::null_count; +use arrow2::bitmap::utils::count_zeros; use criterion::{criterion_group, criterion_main, Criterion}; @@ -11,11 +11,11 @@ fn add_benchmark(c: &mut Criterion) { .collect::>(); c.bench_function(&format!("count_zeros 2^{}", log2_size), |b| { - b.iter(|| null_count(&bytes, 0, bytes.len() * 8)) + b.iter(|| count_zeros(&bytes, 0, bytes.len() * 8)) }); c.bench_function(&format!("count_zeros offset 2^{}", log2_size), |b| { - b.iter(|| null_count(&bytes, 10, bytes.len() * 8 - 10)) + b.iter(|| count_zeros(&bytes, 10, bytes.len() * 8 - 10)) }); }) } diff --git a/benches/unset_count.rs b/benches/unset_count.rs index 6bc0e1907b4..a435afd7826 100644 --- a/benches/unset_count.rs +++ b/benches/unset_count.rs @@ -1,4 +1,4 @@ -use arrow2::bitmap::utils::null_count; +use arrow2::bitmap::utils::count_zeros; use criterion::{criterion_group, criterion_main, Criterion}; @@ -10,12 +10,12 @@ fn add_benchmark(c: &mut Criterion) { .map(|x| 0b01011011u8.rotate_left(x)) .collect::>(); - c.bench_function(&format!("unset_count 2^{}", log2_size), |b| { - b.iter(|| null_count(&bytes, 0, bytes.len() * 8)) + c.bench_function(&format!("count_zeros 2^{}", log2_size), |b| { + b.iter(|| count_zeros(&bytes, 0, bytes.len() * 8)) }); - c.bench_function(&format!("unset_count offset 2^{}", log2_size), |b| { - b.iter(|| null_count(&bytes, 10, bytes.len() * 8 - 10)) + c.bench_function(&format!("count_zeros offset 2^{}", log2_size), |b| { + b.iter(|| count_zeros(&bytes, 10, bytes.len() * 8 - 10)) }); }) } diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index 5011e4864a8..d15dcc80a77 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use crate::{buffer::bytes::Bytes, buffer::MutableBuffer, trusted_len::TrustedLen}; use super::{ - utils::{fmt, get_bit, get_bit_unchecked, null_count, BitChunk, BitChunks, BitmapIter}, + utils::{count_zeros, fmt, get_bit, get_bit_unchecked, BitChunk, BitChunks, BitmapIter}, MutableBitmap, }; @@ -68,7 +68,7 @@ impl Bitmap { #[inline] pub(crate) fn from_bytes(bytes: Bytes, length: usize) -> Self { assert!(length <= bytes.len() * 8); - let null_count = null_count(&bytes, 0, length); + let null_count = count_zeros(&bytes, 0, length); Self { length, offset: 0, @@ -97,7 +97,7 @@ impl Bitmap { /// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits. #[inline] pub fn null_count_range(&self, offset: usize, length: usize) -> usize { - null_count(&self.bytes, self.offset + offset, length) + count_zeros(&self.bytes, self.offset + offset, length) } /// Returns the number of unset bits on this [`Bitmap`]. @@ -115,7 +115,7 @@ impl Bitmap { assert!(offset + length <= self.length); self.offset += offset; self.length = length; - self.null_count = null_count(&self.bytes, self.offset, self.length); + self.null_count = count_zeros(&self.bytes, self.offset, self.length); self } diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index 358d1786b27..389fde3457c 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -3,7 +3,7 @@ use std::iter::FromIterator; use crate::bitmap::utils::merge_reversed; use crate::{buffer::MutableBuffer, trusted_len::TrustedLen}; -use super::utils::{fmt, get_bit, null_count, set, set_bit, BitmapIter}; +use super::utils::{count_zeros, fmt, get_bit, set, set_bit, BitmapIter}; use super::Bitmap; /// A container to store booleans. [`MutableBitmap`] is semantically equivalent @@ -107,7 +107,7 @@ impl MutableBitmap { /// Returns the number of unset bits on this [`MutableBitmap`]. #[inline] pub fn null_count(&self) -> usize { - null_count(&self.buffer, 0, self.length) + count_zeros(&self.buffer, 0, self.length) } /// Returns the length of the [`MutableBitmap`]. diff --git a/src/bitmap/utils/mod.rs b/src/bitmap/utils/mod.rs index 5d866886eb0..212bb7ffb62 100644 --- a/src/bitmap/utils/mod.rs +++ b/src/bitmap/utils/mod.rs @@ -4,6 +4,8 @@ mod iterator; mod slice_iterator; mod zip_validity; +use std::convert::TryInto; + pub(crate) use chunk_iterator::merge_reversed; pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact}; pub use fmt::fmt; @@ -66,21 +68,60 @@ pub fn bytes_for(bits: usize) -> usize { bits.saturating_add(7) / 8 } -#[inline] -pub fn null_count(slice: &[u8], offset: usize, len: usize) -> usize { - //return BitmapIter::new(slice, offset, len).filter(|x| !*x).count(); +/// Returns the number of zero bits in the slice offsetted by `offset` and a length of `length`. +/// # Panics +/// This function panics iff `(offset + len).saturating_add(7) / 8 >= slice.len()` +/// because it corresponds to the situation where `len` is beyond bounds. +pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize { + if len == 0 { + return 0; + }; - // u64 results in optimal performance (verified via benches) - let mut chunks = chunk_iterator::BitChunks::::new(slice, offset, len); + let mut slice = &slice[offset / 8..(offset + len).saturating_add(7) / 8]; + let offset = offset % 8; - let mut count: usize = chunks.by_ref().map(|c| c.count_ones() as usize).sum(); + if (offset + len) / 8 == 0 { + // all within a single byte + let byte = (slice[0] >> offset) << (8 - len); + return len - byte.count_ones() as usize; + } - if chunks.remainder_len() > 0 { - // mask least significant bits up to len, as they are otherwise not required - // here we shift instead because it is a bit faster - let remainder = chunks.remainder() & !0u64 >> (64 - chunks.remainder_len()); - count += remainder.count_ones() as usize; + // slice: [a1,a2,a3,a4], [a5,a6,a7,a8] + // offset: 3 + // len: 4 + // [__,__,__,a4], [a5,a6,a7,__] + let mut set_count = 0; + if offset != 0 { + // count all ignoring the first `offset` bits + // i.e. [__,__,__,a4] + set_count += (slice[0] >> offset).count_ones() as usize; + slice = &slice[1..]; } + if (offset + len) % 8 != 0 { + let end_offset = (offset + len) % 8; // i.e. 3 + 4 = 7 + let last_index = slice.len() - 1; + // count all ignoring the last `offset` bits + // i.e. [a5,a6,a7,__] + set_count += (slice[last_index] << (8 - end_offset)).count_ones() as usize; + slice = &slice[..last_index]; + } + + // finally, count any and all bytes in the middle in groups of 8 + let mut chunks = slice.chunks_exact(8); + set_count += chunks + .by_ref() + .map(|chunk| { + let a = u64::from_ne_bytes(chunk.try_into().unwrap()); + a.count_ones() as usize + }) + .sum::(); + + // and any bytes that do not fit in the group + set_count += chunks + .remainder() + .iter() + .map(|byte| byte.count_ones() as usize) + .sum::(); - len - count + len - set_count } diff --git a/tests/it/bitmap/utils/mod.rs b/tests/it/bitmap/utils/mod.rs index cc30620dae4..f04d9493023 100644 --- a/tests/it/bitmap/utils/mod.rs +++ b/tests/it/bitmap/utils/mod.rs @@ -30,39 +30,39 @@ fn get_bit_basics() { } #[test] -fn null_count_basics() { +fn count_zeros_basics() { let input: &[u8] = &[ 0b01001001, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000, 0b01000000, 0b11111111, ]; - assert_eq!(null_count(input, 0, 8), 8 - 3); - assert_eq!(null_count(input, 1, 7), 7 - 2); - assert_eq!(null_count(input, 1, 8), 8 - 3); - assert_eq!(null_count(input, 2, 7), 7 - 3); - assert_eq!(null_count(input, 0, 32), 32 - 6); - assert_eq!(null_count(input, 9, 2), 2); + assert_eq!(count_zeros(input, 0, 8), 8 - 3); + assert_eq!(count_zeros(input, 1, 7), 7 - 2); + assert_eq!(count_zeros(input, 1, 8), 8 - 3); + assert_eq!(count_zeros(input, 2, 7), 7 - 3); + assert_eq!(count_zeros(input, 0, 32), 32 - 6); + assert_eq!(count_zeros(input, 9, 2), 2); let input: &[u8] = &[0b01000000, 0b01000001]; - assert_eq!(null_count(input, 8, 2), 1); - assert_eq!(null_count(input, 8, 3), 2); - assert_eq!(null_count(input, 8, 4), 3); - assert_eq!(null_count(input, 8, 5), 4); - assert_eq!(null_count(input, 8, 6), 5); - assert_eq!(null_count(input, 8, 7), 5); - assert_eq!(null_count(input, 8, 8), 6); + assert_eq!(count_zeros(input, 8, 2), 1); + assert_eq!(count_zeros(input, 8, 3), 2); + assert_eq!(count_zeros(input, 8, 4), 3); + assert_eq!(count_zeros(input, 8, 5), 4); + assert_eq!(count_zeros(input, 8, 6), 5); + assert_eq!(count_zeros(input, 8, 7), 5); + assert_eq!(count_zeros(input, 8, 8), 6); let input: &[u8] = &[0b01000000, 0b01010101]; - assert_eq!(null_count(input, 9, 2), 1); - assert_eq!(null_count(input, 10, 2), 1); - assert_eq!(null_count(input, 11, 2), 1); - assert_eq!(null_count(input, 12, 2), 1); - assert_eq!(null_count(input, 13, 2), 1); - assert_eq!(null_count(input, 14, 2), 1); + assert_eq!(count_zeros(input, 9, 2), 1); + assert_eq!(count_zeros(input, 10, 2), 1); + assert_eq!(count_zeros(input, 11, 2), 1); + assert_eq!(count_zeros(input, 12, 2), 1); + assert_eq!(count_zeros(input, 13, 2), 1); + assert_eq!(count_zeros(input, 14, 2), 1); } #[test] -fn null_count_1() { +fn count_zeros_1() { // offset = 10, len = 90 => remainder let input: &[u8] = &[73, 146, 36, 73, 146, 36, 73, 146, 36, 73, 146, 36, 9]; - assert_eq!(null_count(input, 10, 90), 60); + assert_eq!(count_zeros(input, 10, 90), 60); }