Skip to content

Commit

Permalink
perf: do not eagerly compute bitcount (#13562)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp committed Jan 10, 2024
1 parent 4b4eecc commit fef0273
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 80 deletions.
26 changes: 21 additions & 5 deletions crates/polars-arrow/src/array/static_array_collect.rs
Expand Up @@ -227,7 +227,12 @@ macro_rules! impl_collect_vec_validity {
let arrow_bitmap = if null_count > 0 {
unsafe {
// SAFETY: we made sure the null_count is correct.
Some(Bitmap::from_inner(Arc::new(bitmap.into()), 0, buf.len(), null_count).unwrap())
Some(Bitmap::from_inner_unchecked(
Arc::new(bitmap.into()),
0,
buf.len(),
Some(null_count),
))
}
} else {
None
Expand Down Expand Up @@ -283,7 +288,12 @@ macro_rules! impl_trusted_collect_vec_validity {
let arrow_bitmap = if null_count > 0 {
unsafe {
// SAFETY: we made sure the null_count is correct.
Some(Bitmap::from_inner(Arc::new(bitmap.into()), 0, buf.len(), null_count).unwrap())
Some(Bitmap::from_inner_unchecked(
Arc::new(bitmap.into()),
0,
buf.len(),
Some(null_count),
))
}
} else {
None
Expand Down Expand Up @@ -613,14 +623,20 @@ macro_rules! impl_collect_bool_validity {
}

let false_count = len - true_count;
let values =
unsafe { Bitmap::from_inner(Arc::new(buf.into()), 0, len, false_count).unwrap() };
let values = unsafe {
Bitmap::from_inner_unchecked(Arc::new(buf.into()), 0, len, Some(false_count))
};

let null_count = len - nonnull_count;
let validity_bitmap = if $with_valid && null_count > 0 {
unsafe {
// SAFETY: we made sure the null_count is correct.
Some(Bitmap::from_inner(Arc::new(validity.into()), 0, len, null_count).unwrap())
Some(Bitmap::from_inner_unchecked(
Arc::new(validity.into()),
0,
len,
Some(null_count),
))
}
} else {
None
Expand Down
141 changes: 74 additions & 67 deletions crates/polars-arrow/src/bitmap/immutable.rs
@@ -1,5 +1,6 @@
use std::iter::FromIterator;
use std::ops::Deref;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;

use either::Either;
Expand All @@ -10,6 +11,8 @@ use super::{chunk_iter_to_vec, IntoIter, MutableBitmap};
use crate::buffer::Bytes;
use crate::trusted_len::TrustedLen;

const UNKNOWN_BIT_COUNT: u64 = u64::MAX;

/// An immutable container semantically equivalent to `Arc<Vec<bool>>` but represented as `Arc<Vec<u8>>` where
/// each boolean is represented as a single bit.
///
Expand Down Expand Up @@ -42,14 +45,31 @@ use crate::trusted_len::TrustedLen;
/// // when sliced (or cloned), it is no longer possible to `into_mut`.
/// let same: Bitmap = sliced.into_mut().left().unwrap();
/// ```
#[derive(Clone)]
pub struct Bitmap {
bytes: Arc<Bytes<u8>>,
// both are measured in bits. They are used to bound the bitmap to a region of Bytes.
// Both offset and length are measured in bits. They are used to bound the
// bitmap to a region of Bytes.
offset: usize,
length: usize,
// this is a cache: it is computed on initialization
unset_bits: usize,

// A bit field that contains our cache for the number of unset bits.
// If it is u64::MAX, we have no known value at all.
// Other bit patterns where the top bit is set is reserved for future use.
// If the top bit is not set we have an exact count.
unset_bit_count_cache: AtomicU64,
}

impl Clone for Bitmap {
fn clone(&self) -> Self {
Self {
bytes: Arc::clone(&self.bytes),
offset: self.offset,
length: self.length,
unset_bit_count_cache: AtomicU64::new(
self.unset_bit_count_cache.load(Ordering::Relaxed),
),
}
}
}

impl std::fmt::Debug for Bitmap {
Expand Down Expand Up @@ -89,12 +109,11 @@ impl Bitmap {
#[inline]
pub fn try_new(bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
check(&bytes, 0, length)?;
let unset_bits = count_zeros(&bytes, 0, length);
Ok(Self {
length,
offset: 0,
bytes: Arc::new(bytes.into()),
unset_bits,
unset_bit_count_cache: AtomicU64::new(UNKNOWN_BIT_COUNT),
})
}

Expand Down Expand Up @@ -143,18 +162,21 @@ impl Bitmap {
/// Returns the number of unset bits on this [`Bitmap`].
///
/// Guaranteed to be `<= self.len()`.
///
/// # Implementation
/// This function is `O(1)` - the number of unset bits is computed when the bitmap is
/// created
pub const fn unset_bits(&self) -> usize {
self.unset_bits
}

/// Returns the number of unset bits on this [`Bitmap`].
#[inline]
#[deprecated(since = "0.13.0", note = "use `unset_bits` instead")]
pub fn null_count(&self) -> usize {
self.unset_bits
///
/// This function counts the number of unset bits if it is not already
/// computed. Repeated calls use the cached bitcount.
pub fn unset_bits(&self) -> usize {
let cache = self.unset_bit_count_cache.load(Ordering::Relaxed);
if cache >> 63 != 0 {
let zeros = count_zeros(&self.bytes, self.offset, self.length);
self.unset_bit_count_cache
.store(zeros as u64, Ordering::Relaxed);
zeros
} else {
cache as usize
}
}

/// Slices `self`, offsetting by `offset` and truncating up to `length` bits.
Expand All @@ -178,24 +200,34 @@ impl Bitmap {
}

// Fast path: we have no nulls or are full-null.
if self.unset_bits == 0 || self.unset_bits == self.length {
let unset_bit_count_cache = self.unset_bit_count_cache.get_mut();
if *unset_bit_count_cache == 0 || *unset_bit_count_cache == self.length as u64 {
let new_count = if *unset_bit_count_cache > 0 {
length as u64
} else {
0
};
*unset_bit_count_cache = new_count;
self.offset += offset;
self.length = length;
self.unset_bits = if self.unset_bits > 0 { length } else { 0 };
return;
}

// If we keep the majority of the slice it's faster to count the parts
// we didn't keep rather than counting directly.
if length > self.length / 2 {
// Subtract the null count of the chunks we slice off.
let start_end = self.offset + offset + length;
let head_count = count_zeros(&self.bytes, self.offset, offset);
let tail_count = count_zeros(&self.bytes, start_end, self.length - length - offset);
self.unset_bits -= head_count + tail_count;
} else {
// Count the null values in the slice.
self.unset_bits = count_zeros(&self.bytes, self.offset + offset, length);
if *unset_bit_count_cache >> 63 == 0 {
// If we keep all but a small portion of the array it is worth
// doing an eager re-count since we can reuse the old count via the
// inclusion-exclusion principle.
let small_portion = (self.length / 5).max(32);
if length + small_portion >= self.length {
// Subtract the null count of the chunks we slice off.
let slice_end = self.offset + offset + length;
let head_count = count_zeros(&self.bytes, self.offset, offset);
let tail_count = count_zeros(&self.bytes, slice_end, self.length - length - offset);
let new_count = *unset_bit_count_cache - head_count as u64 - tail_count as u64;
*unset_bit_count_cache = new_count;
} else {
*unset_bit_count_cache = UNKNOWN_BIT_COUNT;
}
}

self.offset += offset;
Expand Down Expand Up @@ -306,7 +338,7 @@ impl Bitmap {
vec![0; length.saturating_add(7) / 8]
};
let unset_bits = if value { 0 } else { length };
unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, unset_bits) }
unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, Some(unset_bits)) }
}

/// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits.
Expand Down Expand Up @@ -342,38 +374,6 @@ impl Bitmap {
}
}

/// Returns its internal representation
#[must_use]
pub fn into_inner(self) -> (Arc<Bytes<u8>>, usize, usize, usize) {
let Self {
bytes,
offset,
length,
unset_bits,
} = self;
(bytes, offset, length, unset_bits)
}

/// Creates a `[Bitmap]` from its internal representation.
/// This is the inverted from `[Bitmap::into_inner]`
///
/// # Safety
/// The invariants of this struct must be upheld
pub unsafe fn from_inner(
bytes: Arc<Bytes<u8>>,
offset: usize,
length: usize,
unset_bits: usize,
) -> PolarsResult<Self> {
check(&bytes, offset, length)?;
Ok(Self {
bytes,
offset,
length,
unset_bits,
})
}

/// Creates a `[Bitmap]` from its internal representation.
/// This is the inverted from `[Bitmap::into_inner]`
///
Expand All @@ -383,13 +383,20 @@ impl Bitmap {
bytes: Arc<Bytes<u8>>,
offset: usize,
length: usize,
unset_bits: usize,
unset_bits: Option<usize>,
) -> Self {
debug_assert!(check(&bytes[..], offset, length).is_ok());

let unset_bit_count_cache = if let Some(n) = unset_bits {
AtomicU64::new(n as u64)
} else {
AtomicU64::new(UNKNOWN_BIT_COUNT)
};
Self {
bytes,
offset,
length,
unset_bits,
unset_bit_count_cache,
}
}
}
Expand Down Expand Up @@ -456,7 +463,7 @@ impl Bitmap {
Self {
offset,
length,
unset_bits,
unset_bit_count_cache: AtomicU64::new(unset_bits as u64),
bytes: Arc::new(crate::buffer::to_bytes(value.buffer().clone())),
}
}
Expand All @@ -483,7 +490,7 @@ impl IntoIterator for Bitmap {
#[cfg(feature = "arrow_rs")]
impl From<Bitmap> for arrow_buffer::buffer::NullBuffer {
fn from(value: Bitmap) -> Self {
let null_count = value.unset_bits;
let null_count = value.unset_bits();
let buffer = crate::buffer::to_buffer(value.bytes);
let buffer = arrow_buffer::buffer::BooleanBuffer::new(buffer, value.offset, value.length);
// Safety: null count is accurate
Expand Down
5 changes: 2 additions & 3 deletions crates/polars-arrow/src/bitmap/mutable.rs
Expand Up @@ -348,14 +348,13 @@ impl From<MutableBitmap> for Option<Bitmap> {
fn from(buffer: MutableBitmap) -> Self {
let unset_bits = buffer.unset_bits();
if unset_bits > 0 {
// safety:
// invariants of the `MutableBitmap` equal that of `Bitmap`
// SAFETY: invariants of the `MutableBitmap` equal that of `Bitmap`.
let bitmap = unsafe {
Bitmap::from_inner_unchecked(
Arc::new(buffer.buffer.into()),
0,
buffer.length,
unset_bits,
Some(unset_bits),
)
};
Some(bitmap)
Expand Down
15 changes: 10 additions & 5 deletions crates/polars-arrow/src/ffi/array.rs
Expand Up @@ -5,7 +5,7 @@ use polars_error::{polars_bail, PolarsResult};

use super::ArrowArray;
use crate::array::*;
use crate::bitmap::utils::{bytes_for, count_zeros};
use crate::bitmap::utils::bytes_for;
use crate::bitmap::Bitmap;
use crate::buffer::{Buffer, Bytes, BytesAllocator};
use crate::datatypes::{ArrowDataType, PhysicalType};
Expand Down Expand Up @@ -278,12 +278,17 @@ unsafe fn create_bitmap(
let bytes_len = bytes_for(offset + len);
let bytes = Bytes::from_foreign(ptr, bytes_len, BytesAllocator::InternalArrowArray(owner));

let null_count: usize = if is_validity {
array.null_count()
let null_count = if is_validity {
Some(array.null_count())
} else {
count_zeros(bytes.as_ref(), offset, len)
None
};
Bitmap::from_inner(Arc::new(bytes), offset, len, null_count)
Ok(Bitmap::from_inner_unchecked(
Arc::new(bytes),
offset,
len,
null_count,
))
}

fn buffer_offset(array: &ArrowArray, data_type: &ArrowDataType, i: usize) -> usize {
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/unit/operations/test_slice.py
@@ -1,3 +1,7 @@
from __future__ import annotations

import pytest

import polars as pl
from polars.testing import assert_frame_equal, assert_frame_not_equal

Expand Down Expand Up @@ -140,3 +144,19 @@ def test_hconcat_slice_pushdown() -> None:

df_out = out.collect()
assert_frame_equal(df_out, expected)


@pytest.mark.parametrize(
"ref",
[
[0, None], # Mixed.
[None, None], # Full-null.
[0, 0], # All-valid.
],
)
def test_slice_nullcount(ref: list[int | None]) -> None:
ref *= 128 # Embiggen input.
s = pl.Series(ref)
assert s.null_count() == sum(x is None for x in ref)
assert s.slice(64).null_count() == sum(x is None for x in ref[64:])
assert s.slice(50, 60).slice(25).null_count() == sum(x is None for x in ref[75:110])

0 comments on commit fef0273

Please sign in to comment.