diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index 4cc76a897c6..b9056164bb8 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -2,18 +2,45 @@ use std::hint::unreachable_unchecked; use std::iter::FromIterator; use crate::bitmap::utils::{merge_reversed, set_bit_unchecked}; +use crate::error::Error; use crate::trusted_len::TrustedLen; use super::utils::{count_zeros, fmt, get_bit, set, set_bit, BitmapIter}; use super::Bitmap; -/// A container to store booleans. [`MutableBitmap`] is semantically equivalent -/// to [`Vec`], but each value is stored as a single bit, thereby achieving a compression of 8x. -/// This container is the counterpart of [`Vec`] for boolean values. -/// [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`. -/// The main difference against [`Vec`] is that a bitmap cannot be represented as `&[bool]`. +/// A container of booleans. [`MutableBitmap`] is semantically equivalent +/// to [`Vec`]. +/// +/// The two main differences against [`Vec`] is that each element stored as a single bit, +/// thereby: +/// * it uses 8x less memory +/// * it cannot be represented as `&[bool]` (i.e. no pointer arithmetics). +/// +/// A [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`. +/// # Examples +/// ``` +/// use arrow2::bitmap::MutableBitmap; +/// +/// let bitmap = MutableBitmap::from([true, false, true]); +/// assert_eq!(bitmap.iter().collect::>(), vec![true, false, true]); +/// +/// // creation directly from bytes +/// let mut bitmap = MutableBitmap::try_new(vec![0b00001101], 5).unwrap(); +/// // note: the first bit is the left-most of the first byte +/// assert_eq!(bitmap.iter().collect::>(), vec![true, false, true, true, false]); +/// // we can also get the slice: +/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref()); +/// // debug helps :) +/// assert_eq!(format!("{:?}", bitmap), "[0b___01101]".to_string()); +/// +/// // It supports mutation in place +/// bitmap.set(0, false); +/// assert_eq!(format!("{:?}", bitmap), "[0b___01100]".to_string()); +/// // and `O(1)` random access +/// assert_eq!(bitmap.get(0), false); +/// ``` /// # Implementation -/// This container is backed by [`Vec`]. +/// This container is internally a [`Vec`]. #[derive(Clone)] pub struct MutableBitmap { buffer: Vec, @@ -43,29 +70,31 @@ impl MutableBitmap { } } - /// Empties the [`MutableBitmap`]. + /// Initializes a new [`MutableBitmap`] from a [`Vec`] and a length. + /// # Errors + /// This function errors iff `length > bytes.len() * 8` #[inline] - pub fn clear(&mut self) { - self.length = 0; - self.buffer.clear(); - } - - /// Initializes a zeroed [`MutableBitmap`]. - #[inline] - pub fn from_len_zeroed(length: usize) -> Self { - Self { - buffer: vec![0; length.saturating_add(7) / 8], - length, + pub fn try_new(bytes: Vec, length: usize) -> Result { + if length > bytes.len().saturating_mul(8) { + return Err(Error::InvalidArgumentError(format!( + "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})", + length, + bytes.len().saturating_mul(8) + ))); } + Ok(Self { + length, + buffer: bytes, + }) } - /// Initializes a [`MutableBitmap`] with all values set to valid/ true. + /// Initializes a [`MutableBitmap`] from a [`Vec`] and a length. + /// This function is `O(1)`. + /// # Panic + /// Panics iff the length is larger than the length of the buffer times 8. #[inline] - pub fn from_len_set(length: usize) -> Self { - Self { - buffer: vec![u8::MAX; length.saturating_add(7) / 8], - length, - } + pub fn from_vec(buffer: Vec, length: usize) -> Self { + Self::try_new(buffer, length).unwrap() } /// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits. @@ -77,13 +106,6 @@ impl MutableBitmap { } } - /// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer. - #[inline(always)] - pub fn reserve(&mut self, additional: usize) { - self.buffer - .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len()) - } - /// Pushes a new bit to the [`MutableBitmap`], re-sizing it if necessary. #[inline] pub fn push(&mut self, value: bool) { @@ -111,6 +133,75 @@ impl MutableBitmap { Some(value) } + /// Returns whether the position `index` is set. + /// # Panics + /// Panics iff `index >= self.len()`. + #[inline] + pub fn get(&self, index: usize) -> bool { + get_bit(&self.buffer, index) + } + + /// Sets the position `index` to `value` + /// # Panics + /// Panics iff `index >= self.len()`. + #[inline] + pub fn set(&mut self, index: usize, value: bool) { + set_bit(self.buffer.as_mut_slice(), index, value) + } + + /// constructs a new iterator over the values of [`MutableBitmap`]. + pub fn iter(&self) -> BitmapIter { + BitmapIter::new(&self.buffer, 0, self.length) + } + + /// Empties the [`MutableBitmap`]. + #[inline] + pub fn clear(&mut self) { + self.length = 0; + self.buffer.clear(); + } + + /// Extends [`MutableBitmap`] by `additional` values of constant `value`. + /// # Implementation + /// This function is an order of magnitude faster than pushing element by element. + #[inline] + pub fn extend_constant(&mut self, additional: usize, value: bool) { + if additional == 0 { + return; + } + + if value { + self.extend_set(additional) + } else { + self.extend_unset(additional) + } + } + + /// Initializes a zeroed [`MutableBitmap`]. + #[inline] + pub fn from_len_zeroed(length: usize) -> Self { + Self { + buffer: vec![0; length.saturating_add(7) / 8], + length, + } + } + + /// Initializes a [`MutableBitmap`] with all values set to valid/ true. + #[inline] + pub fn from_len_set(length: usize) -> Self { + Self { + buffer: vec![u8::MAX; length.saturating_add(7) / 8], + length, + } + } + + /// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer. + #[inline(always)] + pub fn reserve(&mut self, additional: usize) { + self.buffer + .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len()) + } + /// Returns the capacity of [`MutableBitmap`] in number of bits. #[inline] pub fn capacity(&self) -> usize { @@ -205,36 +296,6 @@ impl MutableBitmap { } } - /// Extends [`MutableBitmap`] by `additional` values of constant `value`. - #[inline] - pub fn extend_constant(&mut self, additional: usize, value: bool) { - if additional == 0 { - return; - } - - if value { - self.extend_set(additional) - } else { - self.extend_unset(additional) - } - } - - /// Returns whether the position `index` is set. - /// # Panics - /// Panics iff `index >= self.len()`. - #[inline] - pub fn get(&self, index: usize) -> bool { - get_bit(&self.buffer, index) - } - - /// Sets the position `index` to `value` - /// # Panics - /// Panics iff `index >= self.len()`. - #[inline] - pub fn set(&mut self, index: usize, value: bool) { - set_bit(self.buffer.as_mut_slice(), index, value) - } - /// Sets the position `index` to `value` /// # Safety /// Caller must ensure that `index < self.len()` @@ -249,18 +310,6 @@ impl MutableBitmap { } } -impl MutableBitmap { - /// Initializes a [`MutableBitmap`] from a [`Vec`] and a length. - /// This function is `O(1)`. - /// # Panic - /// Panics iff the length is larger than the length of the buffer times 8. - #[inline] - pub fn from_vec(buffer: Vec, length: usize) -> Self { - assert!(length <= buffer.len() * 8); - Self { buffer, length } - } -} - impl From for Bitmap { #[inline] fn from(buffer: MutableBitmap) -> Self { @@ -607,7 +656,7 @@ impl MutableBitmap { /// This is the fastest way to extend a [`MutableBitmap`]. /// # Implementation /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8, - /// this function performs a memcopy. Else, it extends [`MutableBitmap`] bit by bit. + /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy. #[inline] pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) { assert!(offset + length <= slice.len() * 8); @@ -656,10 +705,3 @@ impl<'a> IntoIterator for &'a MutableBitmap { BitmapIter::<'a>::new(&self.buffer, 0, self.length) } } - -impl<'a> MutableBitmap { - /// constructs a new iterator over the values of [`MutableBitmap`]. - pub fn iter(&'a self) -> BitmapIter<'a> { - BitmapIter::<'a>::new(&self.buffer, 0, self.length) - } -}