Skip to content

Commit

Permalink
refactor(rust): use new MinMax kernels (#12961)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp committed Dec 9, 2023
1 parent 20cd827 commit 400adab
Show file tree
Hide file tree
Showing 38 changed files with 789 additions and 941 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions crates/polars-arrow/src/array/binary/mod.rs
Expand Up @@ -2,6 +2,7 @@ use either::Either;

use super::specification::try_check_offsets_bounds;
use super::{Array, GenericBinaryArray};
use crate::array::iterator::NonNullValuesIter;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::Bitmap;
use crate::buffer::Buffer;
Expand Down Expand Up @@ -138,6 +139,12 @@ impl<O: Offset> BinaryArray<O> {
BinaryValueIter::new(self)
}

/// Returns an iterator of the non-null values.
#[inline]
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BinaryArray<O>> {
NonNullValuesIter::new(self, self.validity())
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
Expand Down
15 changes: 15 additions & 0 deletions crates/polars-arrow/src/array/boolean/iterator.rs
@@ -1,5 +1,6 @@
use super::super::MutableArray;
use super::{BooleanArray, MutableBooleanArray};
use crate::array::ArrayAccessor;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::IntoIter;

Expand Down Expand Up @@ -53,3 +54,17 @@ impl<'a> MutableBooleanArray {
self.values().iter()
}
}

unsafe impl<'a> ArrayAccessor<'a> for BooleanArray {
type Item = bool;

#[inline]
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
(*self).value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
(*self).len()
}
}
7 changes: 7 additions & 0 deletions crates/polars-arrow/src/array/boolean/mod.rs
@@ -1,6 +1,7 @@
use either::Either;

use super::Array;
use crate::array::iterator::NonNullValuesIter;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::datatypes::{ArrowDataType, PhysicalType};
Expand Down Expand Up @@ -95,6 +96,12 @@ impl BooleanArray {
self.values().iter()
}

/// Returns an iterator of the non-null values.
#[inline]
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BooleanArray> {
NonNullValuesIter::new(self, self.validity())
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
Expand Down
40 changes: 38 additions & 2 deletions crates/polars-arrow/src/array/iterator.rs
@@ -1,17 +1,21 @@
use crate::bitmap::iterator::TrueIdxIter;
use crate::bitmap::Bitmap;
use crate::trusted_len::TrustedLen;

mod private {
pub trait Sealed {}

impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {}
impl<'a, T: super::ArrayAccessor<'a> + ?Sized> Sealed for T {}
}

/// Sealed trait representing assess to a value of an array.
/// Sealed trait representing access to a value of an array.
/// # Safety
/// Implementers of this trait guarantee that
/// `value_unchecked` is safe when called up to `len`
pub unsafe trait ArrayAccessor<'a>: private::Sealed {
type Item: 'a;
/// # Safety
/// The index must be in-bounds in the array.
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item;
fn len(&self) -> usize;
}
Expand Down Expand Up @@ -81,3 +85,35 @@ impl<'a, A: ArrayAccessor<'a>> DoubleEndedIterator for ArrayValuesIter<'a, A> {

unsafe impl<'a, A: ArrayAccessor<'a>> TrustedLen for ArrayValuesIter<'a, A> {}
impl<'a, A: ArrayAccessor<'a>> ExactSizeIterator for ArrayValuesIter<'a, A> {}

pub struct NonNullValuesIter<'a, A: ?Sized> {
accessor: &'a A,
idxs: TrueIdxIter<'a>,
}

impl<'a, A: ArrayAccessor<'a> + ?Sized> NonNullValuesIter<'a, A> {
pub fn new(accessor: &'a A, validity: Option<&'a Bitmap>) -> Self {
Self {
idxs: TrueIdxIter::new(accessor.len(), validity),
accessor,
}
}
}

impl<'a, A: ArrayAccessor<'a> + ?Sized> Iterator for NonNullValuesIter<'a, A> {
type Item = A::Item;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some(i) = self.idxs.next() {
return Some(unsafe { self.accessor.value_unchecked(i) });
}
None
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.idxs.size_hint()
}
}

unsafe impl<'a, A: ArrayAccessor<'a> + ?Sized> TrustedLen for NonNullValuesIter<'a, A> {}
7 changes: 7 additions & 0 deletions crates/polars-arrow/src/array/list/iterator.rs
@@ -1,4 +1,5 @@
use super::ListArray;
use crate::array::iterator::NonNullValuesIter;
use crate::array::{Array, ArrayAccessor, ArrayValuesIter};
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::offset::Offset;
Expand Down Expand Up @@ -41,6 +42,12 @@ impl<'a, O: Offset> ListArray<O> {
pub fn values_iter(&'a self) -> ListValuesIter<'a, O> {
ListValuesIter::new(self)
}

/// Returns an iterator of the non-null values `Box<dyn Array>`.
#[inline]
pub fn non_null_values_iter(&'a self) -> NonNullValuesIter<'a, ListArray<O>> {
NonNullValuesIter::new(self, self.validity())
}
}

struct Iter<T, I: Iterator<Item = Option<T>>> {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/array/mod.rs
Expand Up @@ -678,7 +678,7 @@ mod ffi;
mod fmt;
#[doc(hidden)]
pub mod indexable;
mod iterator;
pub mod iterator;

pub mod growable;
mod values;
Expand Down
16 changes: 15 additions & 1 deletion crates/polars-arrow/src/array/primitive/iterator.rs
@@ -1,12 +1,26 @@
use polars_utils::iter::IntoIteratorCopied;

use super::{MutablePrimitiveArray, PrimitiveArray};
use crate::array::MutableArray;
use crate::array::{ArrayAccessor, MutableArray};
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::IntoIter as BitmapIntoIter;
use crate::buffer::IntoIter;
use crate::types::NativeType;

unsafe impl<'a, T: NativeType> ArrayAccessor<'a> for [T] {
type Item = T;

#[inline]
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
*self.get_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
(*self).len()
}
}

impl<T: NativeType> IntoIterator for PrimitiveArray<T> {
type Item = Option<T>;
type IntoIter = ZipValidity<T, IntoIter<T>, BitmapIntoIter>;
Expand Down
9 changes: 8 additions & 1 deletion crates/polars-arrow/src/array/primitive/mod.rs
Expand Up @@ -3,6 +3,7 @@ use std::ops::Range;
use either::Either;

use super::Array;
use crate::array::iterator::NonNullValuesIter;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::Bitmap;
use crate::buffer::Buffer;
Expand All @@ -15,7 +16,7 @@ mod data;
mod ffi;
pub(super) mod fmt;
mod from_natural;
mod iterator;
pub mod iterator;

mod mutable;
pub use mutable::*;
Expand Down Expand Up @@ -150,6 +151,12 @@ impl<T: NativeType> PrimitiveArray<T> {
self.values().iter()
}

/// Returns an iterator of the non-null values `T`.
#[inline]
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, [T]> {
NonNullValuesIter::new(self.values(), self.validity())
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
Expand Down
7 changes: 7 additions & 0 deletions crates/polars-arrow/src/array/utf8/mod.rs
Expand Up @@ -2,6 +2,7 @@ use either::Either;

use super::specification::try_check_utf8;
use super::{Array, GenericBinaryArray};
use crate::array::iterator::NonNullValuesIter;
use crate::array::BinaryArray;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::Bitmap;
Expand Down Expand Up @@ -133,6 +134,12 @@ impl<O: Offset> Utf8Array<O> {
Utf8ValuesIter::new(self)
}

/// Returns an iterator of the non-null values `&str.
#[inline]
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, Utf8Array<O>> {
NonNullValuesIter::new(self, self.validity())
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
Expand Down
70 changes: 70 additions & 0 deletions crates/polars-arrow/src/bitmap/iterator.rs
@@ -1,6 +1,76 @@
use super::bitmask::BitMask;
use super::Bitmap;
use crate::trusted_len::TrustedLen;

pub struct TrueIdxIter<'a> {
mask: BitMask<'a>,
first_unknown: usize,
i: usize,
len: usize,
remaining: usize,
}

impl<'a> TrueIdxIter<'a> {
#[inline]
pub fn new(len: usize, validity: Option<&'a Bitmap>) -> Self {
if let Some(bitmap) = validity {
assert!(len == bitmap.len());
Self {
mask: BitMask::from_bitmap(bitmap),
first_unknown: 0,
i: 0,
remaining: len,
len,
}
} else {
Self {
mask: BitMask::default(),
first_unknown: len,
i: 0,
remaining: len,
len,
}
}
}
}

impl<'a> Iterator for TrueIdxIter<'a> {
type Item = usize;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
// Fast path for many non-nulls in a row.
if self.i < self.first_unknown {
let ret = self.i;
self.i += 1;
self.remaining -= 1;
return Some(ret);
}

while self.i < self.len {
let mask = self.mask.get_u32(self.i);
let num_null = mask.trailing_zeros();
self.i += num_null as usize;
if num_null < 32 {
self.first_unknown = self.i + (mask >> num_null).trailing_ones() as usize;
let ret = self.i;
self.i += 1;
self.remaining -= 1;
return Some(ret);
}
}

None
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}

unsafe impl<'a> TrustedLen for TrueIdxIter<'a> {}

/// This crates' equivalent of [`std::vec::IntoIter`] for [`Bitmap`].
#[derive(Debug, Clone)]
pub struct IntoIter {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/bitmap/mod.rs
Expand Up @@ -2,7 +2,7 @@
mod immutable;
pub use immutable::*;

mod iterator;
pub mod iterator;
pub use iterator::IntoIter;

mod mutable;
Expand Down

0 comments on commit 400adab

Please sign in to comment.