Skip to content

Commit

Permalink
refactor: add AlignedBitmapSlice (#15171)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp committed Mar 22, 2024
1 parent 6503abc commit d214f55
Show file tree
Hide file tree
Showing 12 changed files with 258 additions and 248 deletions.
129 changes: 129 additions & 0 deletions crates/polars-arrow/src/bitmap/aligned.rs
@@ -0,0 +1,129 @@
use std::iter::Copied;
use std::slice::Iter;

use crate::bitmap::utils::BitChunk;

fn load_chunk_le<T: BitChunk>(src: &[u8]) -> T {
if let Ok(chunk) = src.try_into() {
return T::from_le_bytes(chunk);
}

let mut chunk = T::Bytes::default();
let len = src.len().min(chunk.as_ref().len());
chunk.as_mut()[..len].copy_from_slice(&src[..len]);
T::from_le_bytes(chunk)
}

/// Represents a bitmap split in three portions, a prefix, a suffix and an
/// aligned bulk section in the middle.
#[derive(Default, Clone, Debug)]
pub struct AlignedBitmapSlice<'a, T: BitChunk> {
prefix: T,
prefix_len: u32,
bulk: &'a [T],
suffix: T,
suffix_len: u32,
}

impl<'a, T: BitChunk> AlignedBitmapSlice<'a, T> {
#[inline(always)]
pub fn prefix(&self) -> T {
self.prefix
}

#[inline(always)]
pub fn bulk_iter(&self) -> Copied<Iter<'a, T>> {
self.bulk.iter().copied()
}

#[inline(always)]
pub fn bulk(&self) -> &'a [T] {
self.bulk
}

#[inline(always)]
pub fn suffix(&self) -> T {
self.suffix
}

/// The length (in bits) of the portion of the bitmap found in prefix.
#[inline(always)]
pub fn prefix_bitlen(&self) -> usize {
self.prefix_len as usize
}

/// The length (in bits) of the portion of the bitmap found in bulk.
#[inline(always)]
pub fn bulk_bitlen(&self) -> usize {
8 * std::mem::size_of::<T>() * self.bulk.len()
}

/// The length (in bits) of the portion of the bitmap found in suffix.
#[inline(always)]
pub fn suffix_bitlen(&self) -> usize {
self.suffix_len as usize
}

pub fn new(mut bytes: &'a [u8], mut offset: usize, len: usize) -> Self {
if len == 0 {
return Self::default();
}

assert!(bytes.len() * 8 >= offset + len);

// Strip off useless bytes from start.
let start_byte_idx = offset / 8;
bytes = &bytes[start_byte_idx..];
offset %= 8;

// Fast-path: fits entirely in one chunk.
let chunk_len = std::mem::size_of::<T>();
let chunk_len_bits = 8 * chunk_len;
if offset + len <= chunk_len_bits {
let mut prefix = load_chunk_le::<T>(bytes) >> offset;
if len < chunk_len_bits {
prefix &= (T::one() << len) - T::one();
}
return Self {
prefix,
prefix_len: len as u32,
..Self::default()
};
}

// Find how many bytes from the start our aligned section would start.
let mut align_offset = bytes.as_ptr().align_offset(chunk_len);
let mut align_offset_bits = 8 * align_offset;

// Oops, the original pointer was already aligned, but our offset means
// we can't start there, start one chunk later.
if offset > align_offset_bits {
align_offset_bits += chunk_len_bits;
align_offset += chunk_len;
}

// Calculate based on this the lengths of our sections (in bits).
let prefix_len = (align_offset_bits - offset).min(len);
let rest_len = len - prefix_len;
let suffix_len = rest_len % chunk_len_bits;
let bulk_len = rest_len - suffix_len;
debug_assert!(prefix_len < chunk_len_bits);
debug_assert!(bulk_len % chunk_len_bits == 0);
debug_assert!(suffix_len < chunk_len_bits);

// Now we just have to load.
let (prefix_bytes, rest_bytes) = bytes.split_at(align_offset);
let (bulk_bytes, suffix_bytes) = rest_bytes.split_at(bulk_len / 8);
let mut prefix = load_chunk_le::<T>(prefix_bytes) >> offset;
let mut suffix = load_chunk_le::<T>(suffix_bytes);
prefix &= (T::one() << prefix_len) - T::one();
suffix &= (T::one() << suffix_len) - T::one();
Self {
prefix,
bulk: bytemuck::cast_slice(bulk_bytes),
suffix,
prefix_len: prefix_len as u32,
suffix_len: suffix_len as u32,
}
}
}
10 changes: 8 additions & 2 deletions crates/polars-arrow/src/bitmap/immutable.rs
Expand Up @@ -7,6 +7,7 @@ use polars_error::{polars_bail, PolarsResult};

use super::utils::{count_zeros, fmt, get_bit, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::{chunk_iter_to_vec, IntoIter, MutableBitmap};
use crate::bitmap::aligned::AlignedBitmapSlice;
use crate::bitmap::iterator::{
FastU32BitmapIter, FastU56BitmapIter, FastU64BitmapIter, TrueIdxIter,
};
Expand All @@ -33,7 +34,7 @@ const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
/// // we can also get the slice:
/// assert_eq!(bitmap.as_slice(), ([0b00001101u8].as_ref(), 0, 5));
/// // debug helps :)
/// assert_eq!(format!("{:?}", bitmap), "[0b___01101]".to_string());
/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
///
/// // it supports copy-on-write semantics (to a `MutableBitmap`)
/// let bitmap: MutableBitmap = bitmap.into_mut().right().unwrap();
Expand All @@ -44,7 +45,7 @@ const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
/// let mut sliced = bitmap.clone();
/// sliced.slice(1, 4);
/// assert_eq!(sliced.as_slice(), ([0b00001101u8].as_ref(), 1, 4)); // 1 here is the offset:
/// assert_eq!(format!("{:?}", sliced), "[0b___0110_]".to_string());
/// assert_eq!(format!("{:?}", sliced), "Bitmap { len: 4, offset: 1, bytes: [0b___0110_] }");
/// // when sliced (or cloned), it is no longer possible to `into_mut`.
/// let same: Bitmap = sliced.into_mut().left().unwrap();
/// ```
Expand Down Expand Up @@ -167,6 +168,11 @@ impl Bitmap {
TrueIdxIter::new(self.len(), Some(self))
}

/// Returns the bits of this [`Bitmap`] as a [`AlignedBitmapSlice`].
pub fn aligned<T: BitChunk>(&self) -> AlignedBitmapSlice<'_, T> {
AlignedBitmapSlice::new(&self.bytes, self.offset, self.length)
}

/// Returns the byte slice of this [`Bitmap`].
///
/// The returned tuple contains:
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/bitmap/mod.rs
Expand Up @@ -11,6 +11,8 @@ pub use mutable::MutableBitmap;
mod bitmap_ops;
pub use bitmap_ops::*;

pub mod aligned;

mod assign_ops;
pub use assign_ops::*;

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/bitmap/mutable.rs
Expand Up @@ -33,11 +33,11 @@ use crate::trusted_len::TrustedLen;
/// // we can also get the slice:
/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref());
/// // debug helps :)
/// assert_eq!(format!("{:?}", bitmap), "[0b___01101]".to_string());
/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
///
/// // It supports mutation in place
/// bitmap.set(0, false);
/// assert_eq!(format!("{:?}", bitmap), "[0b___01100]".to_string());
/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01100] }");
/// // and `O(1)` random access
/// assert_eq!(bitmap.get(0), false);
/// ```
Expand Down
10 changes: 5 additions & 5 deletions crates/polars-arrow/src/bitmap/utils/fmt.rs
Expand Up @@ -11,10 +11,10 @@ pub fn fmt(
) -> std::fmt::Result {
assert!(offset < 8);

f.write_char('[')?;
write!(f, "Bitmap {{ len: {length}, offset: {offset}, bytes: [")?;
let mut remaining = length;
if remaining == 0 {
f.write_char(']')?;
f.write_str("] }")?;
return Ok(());
}

Expand All @@ -39,7 +39,7 @@ pub fn fmt(
remaining -= until - offset;

if remaining == 0 {
f.write_char(']')?;
f.write_str("] }")?;
return Ok(());
}

Expand All @@ -50,7 +50,7 @@ pub fn fmt(
}
remaining -= number_of_bytes * 8;
if remaining == 0 {
f.write_char(']')?;
f.write_str("] }")?;
return Ok(());
}

Expand All @@ -68,5 +68,5 @@ pub fn fmt(
f.write_char('0')?;
}
}
f.write_char(']')
f.write_str("] }")
}
112 changes: 19 additions & 93 deletions crates/polars-arrow/src/bitmap/utils/mod.rs
Expand Up @@ -11,10 +11,12 @@ pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact}
pub use chunks_exact_mut::BitChunksExactMut;
pub use fmt::fmt;
pub use iterator::BitmapIter;
use polars_utils::slice::GetSaferUnchecked;
use polars_utils::slice::{load_padded_le_u64, GetSaferUnchecked};
pub use slice_iterator::SlicesIterator;
pub use zip_validity::{ZipValidity, ZipValidityIter};

use crate::bitmap::aligned::AlignedBitmapSlice;

/// Returns whether bit at position `i` in `byte` is set or not
#[inline]
pub fn is_set(byte: u8, i: usize) -> bool {
Expand Down Expand Up @@ -78,103 +80,27 @@ pub fn bytes_for(bits: usize) -> usize {

/// Returns the number of zero bits in the slice offsetted by `offset` and a length of `length`.
/// # Panics
/// This function panics iff `(offset + len).saturating_add(7) / 8 >= slice.len()`
/// because it corresponds to the situation where `len` is beyond bounds.
pub fn count_zeros(mut slice: &[u8], mut offset: usize, len: usize) -> usize {
/// This function panics iff `offset + len > 8 * slice.len()``.
pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

// Reduce the slice only to relevant bytes.
let first_byte_idx = offset / 8;
let last_byte_idx = (offset + len - 1) / 8;
slice = &slice[first_byte_idx..=last_byte_idx];
offset %= 8;

// Fast path for single u64.
if slice.len() <= 8 {
let mut tmp = [0u8; 8];
tmp[..slice.len()].copy_from_slice(slice);
let word = u64::from_ne_bytes(tmp) >> offset;
let masked = word << (64 - len);
return len - masked.count_ones() as usize;
}

let mut len_uncounted = len;
let mut num_ones = 0;

// Handle first partial byte.
if offset != 0 {
let partial_byte;
(partial_byte, slice) = slice.split_first().unwrap();
num_ones += (partial_byte >> offset).count_ones() as usize;
len_uncounted -= 8 - offset;
}

// Handle last partial byte.
let final_partial_len = len_uncounted % 8;
if final_partial_len != 0 {
let partial_byte;
(partial_byte, slice) = slice.split_last().unwrap();
let masked = partial_byte << (8 - final_partial_len);
num_ones += masked.count_ones() as usize;
}

// SAFETY: transmuting u8 to u64 is fine.
let (start, mid, end) = unsafe { slice.align_to::<u64>() };
assert!(8 * slice.len() >= offset + len);

// Handle unaligned ends.
let mut tmp = [0u8; 8];
tmp[..start.len()].copy_from_slice(start);
num_ones += u64::from_ne_bytes(tmp).count_ones() as usize;
tmp = [0u8; 8];
tmp[..end.len()].copy_from_slice(end);
num_ones += u64::from_ne_bytes(tmp).count_ones() as usize;

// Handle the bulk.
num_ones += mid
.iter()
.copied()
.map(|w| w.count_ones() as usize)
.sum::<usize>();

len - num_ones
}

/// Takes the given slice of bytes plus a bit offset and bit length and returns
/// the slice so that it starts at a byte-aligned boundary.
///
/// Returns (in order):
/// - the bits of the first byte if it isn't a full byte
/// - the number of bits in the first partial byte
/// - the rest of the bits as a byteslice
/// - the number of bits in the byteslice
#[inline]
pub fn align_bitslice_start_u8(
slice: &[u8],
offset: usize,
len: usize,
) -> (u8, usize, &[u8], usize) {
if len == 0 {
return (0, 0, &[], 0);
// Fast-path: fits in a single u64 load.
let first_byte_idx = offset / 8;
let offset_in_byte = offset % 8;
if offset_in_byte + len <= 64 {
let mut word = load_padded_le_u64(&slice[first_byte_idx..]);
word >>= offset_in_byte;
word <<= 64 - len;
return len - word.count_ones() as usize;
}

// Protects the below get_uncheckeds.
assert!(slice.len() * 8 >= offset + len);

let mut first_byte_idx = offset / 8;
let partial_offset = offset % 8;
let bits_in_partial_byte = (8 - partial_offset).min(len) % 8;
let mut partial_byte = unsafe { *slice.get_unchecked(first_byte_idx) };
partial_byte >>= partial_offset;
partial_byte &= (1 << bits_in_partial_byte) - 1;
first_byte_idx += (partial_offset > 0) as usize;

let rest_slice = unsafe { slice.get_unchecked(first_byte_idx..) };
(
partial_byte,
bits_in_partial_byte,
rest_slice,
len - bits_in_partial_byte,
)
let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
let ones_in_prefix = aligned.prefix().count_ones() as usize;
let ones_in_bulk: usize = aligned.bulk_iter().map(|w| w.count_ones() as usize).sum();
let ones_in_suffix = aligned.suffix().count_ones() as usize;
len - ones_in_prefix - ones_in_bulk - ones_in_suffix
}
1 change: 1 addition & 0 deletions crates/polars-arrow/src/types/native.rs
Expand Up @@ -33,6 +33,7 @@ pub trait NativeType:
/// Type denoting its representation as bytes.
/// This is `[u8; N]` where `N = size_of::<T>`.
type Bytes: AsRef<[u8]>
+ AsMut<[u8]>
+ std::ops::Index<usize, Output = u8>
+ std::ops::IndexMut<usize, Output = u8>
+ for<'a> TryFrom<&'a [u8]>
Expand Down

0 comments on commit d214f55

Please sign in to comment.