Skip to content

Commit

Permalink
add reverse collect for numeric ChunkedArray
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 23, 2021
1 parent 269247f commit a21d2a2
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 4 deletions.
2 changes: 2 additions & 0 deletions polars/polars-arrow/src/trusted_len/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
mod boolean;
mod rev;

use crate::utils::{FromTrustedLenIterator, TrustMyLength};
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
use arrow::buffer::MutableBuffer;
use arrow::types::NativeType;
pub use rev::FromIteratorReversed;
use std::slice::Iter;

/// An iterator of known, fixed size.
Expand Down
5 changes: 5 additions & 0 deletions polars/polars-arrow/src/trusted_len/rev.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
use crate::trusted_len::TrustedLen;

pub trait FromIteratorReversed<T>: Sized {
fn from_trusted_len_iter_rev<I: TrustedLen<Item = T>>(iter: I) -> Self;
}
9 changes: 8 additions & 1 deletion polars/polars-arrow/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::trusted_len::{PushUnchecked, TrustedLen};
use crate::trusted_len::{FromIteratorReversed, PushUnchecked, TrustedLen};
use arrow::bitmap::Bitmap;
use std::ops::BitAnd;

Expand Down Expand Up @@ -78,6 +78,13 @@ pub trait CustomIterTools: Iterator {
{
FromTrustedLenIterator::from_iter_trusted_length(self)
}

fn collect_reversed<T: FromIteratorReversed<Self::Item>>(self) -> T
where
Self: Sized + TrustedLen,
{
FromIteratorReversed::from_trusted_len_iter_rev(self)
}
}

pub trait CustomIterToolsSized: Iterator + Sized {}
Expand Down
97 changes: 94 additions & 3 deletions polars/polars-core/src/chunked_array/trusted_len.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use crate::chunked_array::upstream_traits::PolarsAsRef;
use crate::prelude::*;
use crate::utils::{CustomIterTools, FromTrustedLenIterator, NoNull};
use arrow::buffer::Buffer;
use arrow::bitmap::MutableBitmap;
use arrow::buffer::{Buffer, MutableBuffer};
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::trusted_len::FromIteratorReversed;
use std::borrow::Borrow;

impl<T> FromTrustedLenIterator<Option<T::Native>> for ChunkedArray<T>
where
T: PolarsPrimitiveType,
T: PolarsNumericType,
{
fn from_iter_trusted_length<I: IntoIterator<Item = Option<T::Native>>>(iter: I) -> Self {
let iter = iter.into_iter();
Expand All @@ -21,7 +24,7 @@ where
// NoNull is only a wrapper needed for specialization
impl<T> FromTrustedLenIterator<T::Native> for NoNull<ChunkedArray<T>>
where
T: PolarsPrimitiveType,
T: PolarsNumericType,
{
// We use AlignedVec because it is way faster than Arrows builder. We can do this because we
// know we don't have null values.
Expand All @@ -33,6 +36,72 @@ where
NoNull::new(ChunkedArray::new_from_chunks("", vec![Arc::new(arr)]))
}
}

impl<T> FromIteratorReversed<Option<T::Native>> for ChunkedArray<T>
where
T: PolarsNumericType,
{
fn from_trusted_len_iter_rev<I: TrustedLen<Item = Option<T::Native>>>(iter: I) -> Self {
debug_assert_eq!(iter.size_hint().0, iter.size_hint().1.unwrap());
let size = iter.size_hint().0;

let mut vals: MutableBuffer<T::Native> = AlignedVec::with_capacity(size);
let mut validity = MutableBitmap::with_capacity(size);
validity.extend_constant(size, true);
let validity_ptr = validity.as_slice().as_ptr() as *mut u8;
unsafe {
// set to end of buffer
let mut ptr = vals.as_mut_ptr().add(size);
let mut offset = size;

iter.for_each(|opt_item| {
offset -= 1;
ptr = ptr.sub(1);
match opt_item {
Some(item) => {
std::ptr::write(ptr, item);
}
None => {
std::ptr::write(ptr, T::Native::default());
unset_bit_raw(validity_ptr, offset)
}
}
});
vals.set_len(size)
}
let arr = PrimitiveArray::from_data(
T::get_dtype().to_arrow(),
vals.into(),
Some(validity.into()),
);
ChunkedArray::new_from_chunks("", vec![Arc::new(arr)])
}
}

impl<T> FromIteratorReversed<T::Native> for NoNull<ChunkedArray<T>>
where
T: PolarsNumericType,
{
fn from_trusted_len_iter_rev<I: TrustedLen<Item = T::Native>>(iter: I) -> Self {
debug_assert_eq!(iter.size_hint().0, iter.size_hint().1.unwrap());
let size = iter.size_hint().0;

let mut vals: MutableBuffer<T::Native> = AlignedVec::with_capacity(size);
unsafe {
// set to end of buffer
let mut ptr = vals.as_mut_ptr().add(size);

iter.for_each(|item| {
ptr = ptr.sub(1);
std::ptr::write(ptr, item);
});
vals.set_len(size)
}
let arr = PrimitiveArray::from_data(T::get_dtype().to_arrow(), vals.into(), None);
NoNull::new(ChunkedArray::new_from_chunks("", vec![Arc::new(arr)]))
}
}

impl<Ptr> FromTrustedLenIterator<Ptr> for ListChunked
where
Ptr: Borrow<Series>,
Expand Down Expand Up @@ -110,3 +179,25 @@ impl<T: PolarsObject> FromTrustedLenIterator<Option<T>> for ObjectChunked<T> {
iter.collect()
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::utils::CustomIterTools;

#[test]
fn test_reverse_collect() {
let ca: NoNull<Int32Chunked> = (0..5).collect_reversed();
let arr = ca.downcast_iter().next().unwrap();
let s = arr.values().as_slice();
assert_eq!(s, &[4, 3, 2, 1, 0]);

let ca: Int32Chunked = (0..5)
.map(|val| match val % 2 == 0 {
true => Some(val),
false => None,
})
.collect_reversed();
assert_eq!(Vec::from(&ca), &[Some(4), None, Some(2), None, Some(0)]);
}
}

0 comments on commit a21d2a2

Please sign in to comment.