Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Renamed Bitmap::null_count to Bitmap::unset_bits #1087

Merged
merged 1 commit into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions benches/bitmap_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use arrow2::bitmap::Bitmap;

fn bench_arrow2(lhs: &Bitmap, rhs: &Bitmap) {
let r = lhs | rhs;
assert!(r.null_count() > 0);
assert!(r.unset_bits() > 0);
}

fn add_benchmark(c: &mut Criterion) {
Expand All @@ -15,7 +15,7 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function(&format!("bitmap aligned not 2^{}", log2_size), |b| {
b.iter(|| {
let r = !&bitmap;
assert!(r.null_count() > 0);
assert!(r.unset_bits() > 0);
})
});

Expand All @@ -27,7 +27,7 @@ fn add_benchmark(c: &mut Criterion) {
|b| {
b.iter(|| {
let r = bitmap.clone().slice(offset, len);
assert!(r.null_count() > 0);
assert!(r.unset_bits() > 0);
})
},
);
Expand All @@ -40,7 +40,7 @@ fn add_benchmark(c: &mut Criterion) {
|b| {
b.iter(|| {
let r = bitmap.clone().slice(offset, len);
assert!(r.null_count() > 0);
assert!(r.unset_bits() > 0);
})
},
);
Expand All @@ -49,7 +49,7 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function(&format!("bitmap not 2^{}", log2_size), |b| {
b.iter(|| {
let r = !&bitmap1;
assert!(r.null_count() > 0);
assert!(r.unset_bits() > 0);
})
});

Expand Down
2 changes: 1 addition & 1 deletion benches/slices_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
use arrow2::bitmap::{utils::SlicesIterator, Bitmap};

fn bench_slices(lhs: &Bitmap) {
let set_count = lhs.len() - lhs.null_count();
let set_count = lhs.len() - lhs.unset_bits();
let slices = SlicesIterator::new(lhs);

let count = slices.fold(0usize, |acc, v| acc + v.1);
Expand Down
4 changes: 2 additions & 2 deletions src/array/binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ impl<O: Offset> MutableBinaryArray<O> {
// soundness: assumed trusted len
let (mut validity, offsets, values) = try_trusted_len_unzip(iterator)?;

if validity.as_mut().unwrap().null_count() == 0 {
if validity.as_mut().unwrap().unset_bits() == 0 {
validity = None;
}

Expand Down Expand Up @@ -395,7 +395,7 @@ impl<O: Offset> MutableBinaryArray<O> {
iterator,
);

if self.validity.as_mut().unwrap().null_count() == 0 {
if self.validity.as_mut().unwrap().unset_bits() == 0 {
self.validity = None;
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/array/boolean/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ impl MutableBooleanArray {

extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);

if validity.null_count() > 0 {
if validity.unset_bits() > 0 {
self.validity = Some(validity);
}
}
Expand Down Expand Up @@ -299,7 +299,7 @@ impl MutableBooleanArray {
{
let (validity, values) = try_trusted_len_unzip(iterator)?;

let validity = if validity.null_count() > 0 {
let validity = if validity.unset_bits() > 0 {
Some(validity)
} else {
None
Expand Down Expand Up @@ -344,7 +344,7 @@ where

extend_trusted_len_unzip(iterator, &mut validity, &mut values);

let validity = if validity.null_count() > 0 {
let validity = if validity.unset_bits() > 0 {
Some(validity)
} else {
None
Expand Down
2 changes: 1 addition & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
};
self.validity()
.as_ref()
.map(|x| x.null_count())
.map(|x| x.unset_bits())
.unwrap_or(0)
}

Expand Down
2 changes: 1 addition & 1 deletion src/array/physical_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ where

extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator);

let validity = if validity.null_count() > 0 {
let validity = if validity.unset_bits() > 0 {
Some(validity)
} else {
None
Expand Down
2 changes: 1 addition & 1 deletion src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {
fn from(other: MutablePrimitiveArray<T>) -> Self {
let validity = other.validity.and_then(|x| {
let bitmap: Bitmap = x.into();
if bitmap.null_count() == 0 {
if bitmap.unset_bits() == 0 {
None
} else {
Some(bitmap)
Expand Down
2 changes: 1 addition & 1 deletion src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O> {
// `Utf8Array` can be safely created from `MutableUtf8Array` without checks.
let validity = other.validity.and_then(|x| {
let bitmap: Bitmap = x.into();
if bitmap.null_count() == 0 {
if bitmap.unset_bits() == 0 {
None
} else {
Some(bitmap)
Expand Down
8 changes: 4 additions & 4 deletions src/bitmap/assign_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@ where
#[inline]
/// Compute bitwise OR operation in-place
fn or_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
if rhs.null_count() == 0 {
if rhs.unset_bits() == 0 {
assert_eq!(lhs.len(), rhs.len());
lhs.clear();
lhs.extend_constant(rhs.len(), true);
} else if rhs.null_count() == rhs.len() {
} else if rhs.unset_bits() == rhs.len() {
// bitmap remains
} else {
binary_assign(lhs, rhs, |x: T, y| x | y)
Expand All @@ -138,10 +138,10 @@ impl<'a, 'b> std::ops::BitOr<&'a Bitmap> for MutableBitmap {
#[inline]
/// Compute bitwise `&` between `lhs` and `rhs`, assigning it to `lhs`
fn and_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
if rhs.null_count() == 0 {
if rhs.unset_bits() == 0 {
// bitmap remains
}
if rhs.null_count() == rhs.len() {
if rhs.unset_bits() == rhs.len() {
assert_eq!(lhs.len(), rhs.len());
lhs.clear();
lhs.extend_constant(rhs.len(), false);
Expand Down
8 changes: 4 additions & 4 deletions src/bitmap/bitmap_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {
#[inline]
/// Compute bitwise AND operation
pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
if lhs.null_count() == lhs.len() || rhs.null_count() == rhs.len() {
if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {
assert_eq!(lhs.len(), rhs.len());
Bitmap::new_zeroed(lhs.len())
} else {
Expand All @@ -177,7 +177,7 @@ pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
#[inline]
/// Compute bitwise OR operation
pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
if lhs.null_count() == 0 || rhs.null_count() == 0 {
if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {
assert_eq!(lhs.len(), rhs.len());
let mut mutable = MutableBitmap::with_capacity(lhs.len());
mutable.extend_constant(lhs.len(), true);
Expand All @@ -190,8 +190,8 @@ pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
#[inline]
/// Compute bitwise XOR operation
pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
let lhs_nulls = lhs.null_count();
let rhs_nulls = rhs.null_count();
let lhs_nulls = lhs.unset_bits();
let rhs_nulls = rhs.unset_bits();

// all false or all true
if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {
Expand Down
27 changes: 19 additions & 8 deletions src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub struct Bitmap {
offset: usize,
length: usize,
// this is a cache: it is computed on initialization
null_count: usize,
unset_bits: usize,
}

impl std::fmt::Debug for Bitmap {
Expand Down Expand Up @@ -83,12 +83,12 @@ impl Bitmap {
bytes.len().saturating_mul(8)
)));
}
let null_count = count_zeros(&bytes, 0, length);
let unset_bits = count_zeros(&bytes, 0, length);
Ok(Self {
length,
offset: 0,
bytes: Arc::new(bytes.into()),
null_count,
unset_bits,
})
}

Expand Down Expand Up @@ -122,12 +122,12 @@ impl Bitmap {
#[inline]
pub(crate) fn from_bytes(bytes: Bytes<u8>, length: usize) -> Self {
assert!(length <= bytes.len() * 8);
let null_count = count_zeros(&bytes, 0, length);
let unset_bits = count_zeros(&bytes, 0, length);
Self {
length,
offset: 0,
bytes: Arc::new(bytes),
null_count,
unset_bits,
}
}

Expand All @@ -149,10 +149,21 @@ impl Bitmap {
)
}

/// Returns the number of unset bits on this [`Bitmap`].
///
/// Guaranted to be `<= self.len()`.
/// # Implementation
/// This function is `O(1)` - the number of unset bits is computed when the bitmap is
/// created
pub const fn unset_bits(&self) -> usize {
self.unset_bits
}

/// Returns the number of unset bits on this [`Bitmap`].
#[inline]
#[deprecated(since = "0.13.0", note = "use `unset_bits` instead")]
pub fn null_count(&self) -> usize {
self.null_count
self.unset_bits
}

/// Slices `self`, offsetting by `offset` and truncating up to `length` bits.
Expand All @@ -174,13 +185,13 @@ impl Bitmap {
// count the smallest chunk
if length < self.length / 2 {
// count the null values in the slice
self.null_count = count_zeros(&self.bytes, self.offset + offset, length);
self.unset_bits = count_zeros(&self.bytes, self.offset + offset, length);
} else {
// subtract the null count of the chunks we slice off
let start_end = self.offset + offset + length;
let head_count = count_zeros(&self.bytes, self.offset, offset);
let tail_count = count_zeros(&self.bytes, start_end, self.length - length - offset);
self.null_count -= head_count + tail_count;
self.unset_bits -= head_count + tail_count;
}
self.offset += offset;
self.length = length;
Expand Down
15 changes: 12 additions & 3 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,20 @@ impl MutableBitmap {
}

/// Returns the number of unset bits on this [`MutableBitmap`].
#[inline]
pub fn null_count(&self) -> usize {
///
/// Guaranted to be `<= self.len()`.
/// # Implementation
/// This function is `O(N)`
pub fn unset_bits(&self) -> usize {
count_zeros(&self.buffer, 0, self.length)
}

/// Returns the number of unset bits on this [`MutableBitmap`].
#[deprecated(since = "0.13.0", note = "use `unset_bits` instead")]
pub fn null_count(&self) -> usize {
self.unset_bits()
}

/// Returns the length of the [`MutableBitmap`].
#[inline]
pub fn len(&self) -> usize {
Expand Down Expand Up @@ -327,7 +336,7 @@ impl From<MutableBitmap> for Bitmap {
impl From<MutableBitmap> for Option<Bitmap> {
#[inline]
fn from(buffer: MutableBitmap) -> Self {
if buffer.null_count() > 0 {
if buffer.unset_bits() > 0 {
Some(Bitmap::try_new(buffer.buffer, buffer.length).unwrap())
} else {
None
Expand Down
2 changes: 1 addition & 1 deletion src/bitmap/utils/slice_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl<'a> SlicesIterator<'a> {

Self {
state,
count: values.len() - values.null_count(),
count: values.len() - values.unset_bits(),
max_len: values.len(),
values: iter,
mask: 1u8.rotate_left(offset as u32),
Expand Down
4 changes: 2 additions & 2 deletions src/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
if null_count == array.len() {
None
} else if null_count == 0 {
Some(array.values().null_count() == 0)
Some(array.values().unset_bits() == 0)
} else {
// Note the min bool is false (0), so short circuit as soon as we see it
array
Expand Down Expand Up @@ -291,7 +291,7 @@ pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
if null_count == array.len() {
None
} else if null_count == 0 {
Some(array.values().null_count() < array.len())
Some(array.values().unset_bits() < array.len())
} else {
// Note the max bool is true (1), so short circuit as soon as we see it
array
Expand Down
4 changes: 2 additions & 2 deletions src/compute/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ pub fn any(array: &BooleanArray) -> bool {
array.into_iter().any(|v| v == Some(true))
} else {
let vals = array.values();
vals.null_count() != vals.len()
vals.unset_bits() != vals.len()
}
}

Expand All @@ -204,6 +204,6 @@ pub fn all(array: &BooleanArray) -> bool {
false
} else {
let vals = array.values();
vals.null_count() == 0
vals.unset_bits() == 0
}
}
6 changes: 3 additions & 3 deletions src/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ fn null_filter_simd<T: NativeType + Simd>(
mask: &Bitmap,
) -> (Vec<T>, MutableBitmap) {
assert_eq!(values.len(), mask.len());
let filter_count = mask.len() - mask.null_count();
let filter_count = mask.len() - mask.unset_bits();

let (slice, offset, length) = mask.as_slice();
if offset == 0 {
Expand All @@ -150,7 +150,7 @@ fn null_filter_simd<T: NativeType + Simd>(

fn nonnull_filter_simd<T: NativeType + Simd>(values: &[T], mask: &Bitmap) -> Vec<T> {
assert_eq!(values.len(), mask.len());
let filter_count = mask.len() - mask.null_count();
let filter_count = mask.len() - mask.unset_bits();

let (slice, offset, length) = mask.as_slice();
if offset == 0 {
Expand Down Expand Up @@ -264,7 +264,7 @@ pub fn filter(array: &dyn Array, filter: &BooleanArray) -> Result<Box<dyn Array>
return crate::compute::filter::filter(array, &filter);
}

let false_count = filter.values().null_count();
let false_count = filter.values().unset_bits();
if false_count == filter.len() {
assert_eq!(array.len(), filter.len());
return Ok(array.slice(0, 0));
Expand Down
Loading