Skip to content

Commit

Permalink
specifically start using 'collect_trusted', apply done
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 1, 2021
1 parent df85610 commit 7675e2b
Show file tree
Hide file tree
Showing 9 changed files with 237 additions and 27 deletions.
1 change: 1 addition & 0 deletions polars/polars-arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ pub mod buffer;
pub mod error;
pub mod kernels;
pub mod prelude;
pub mod trusted_len;
pub mod utils;
50 changes: 50 additions & 0 deletions polars/polars-arrow/src/trusted_len.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use crate::utils::TrustMyLength;
use std::slice::Iter;

/// An iterator of known, fixed size.
/// A trait denoting Rusts' unstable [TrustedLen](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// This is re-defined here and implemented for some iterators until `std::iter::TrustedLen`
/// is stabilized.
/// *Implementation from Jorge Leitao on Arrow2
pub unsafe trait TrustedLen: Iterator {}

unsafe impl<T> TrustedLen for Iter<'_, T> {}

unsafe impl<B, I: TrustedLen, T: FnMut(I::Item) -> B> TrustedLen for std::iter::Map<I, T> {}

unsafe impl<'a, I, T: 'a> TrustedLen for std::iter::Copied<I>
where
I: TrustedLen<Item = &'a T>,
T: Copy,
{
}

unsafe impl<I> TrustedLen for std::iter::Enumerate<I> where I: TrustedLen {}

unsafe impl<A, B> TrustedLen for std::iter::Zip<A, B>
where
A: TrustedLen,
B: TrustedLen,
{
}

unsafe impl<T> TrustedLen for std::slice::Windows<'_, T> {}

unsafe impl<A, B> TrustedLen for std::iter::Chain<A, B>
where
A: TrustedLen,
B: TrustedLen<Item = A::Item>,
{
}

unsafe impl<T> TrustedLen for std::iter::Once<T> {}

unsafe impl<T> TrustedLen for std::vec::IntoIter<T> {}

unsafe impl<A: Clone> TrustedLen for std::iter::Repeat<A> {}
unsafe impl<A, F: FnMut() -> A> TrustedLen for std::iter::RepeatWith<F> {}
unsafe impl<A: TrustedLen> TrustedLen for std::iter::Take<A> {}

unsafe impl<I: TrustedLen + DoubleEndedIterator> TrustedLen for std::iter::Rev<I> {}

unsafe impl<I: Iterator<Item = J>, J> TrustedLen for TrustMyLength<I, J> {}
14 changes: 14 additions & 0 deletions polars/polars-arrow/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::trusted_len::TrustedLen;
use arrow::bitmap::Bitmap;
use std::ops::BitAnd;

Expand Down Expand Up @@ -70,6 +71,19 @@ pub trait CustomIterTools: Iterator {
{
TrustMyLength::new(self, length)
}

fn collect_trusted<T: FromTrustedLenIterator<Self::Item>>(self) -> T
where
Self: Sized + TrustedLen,
{
FromTrustedLenIterator::from_iter_trusted_length(self)
}
}

pub trait CustomIterToolsSized: Iterator + Sized {}

impl<T: ?Sized> CustomIterTools for T where T: Iterator {}

pub trait FromTrustedLenIterator<A>: Sized {
fn from_iter_trusted_length<T: IntoIterator<Item = A> + TrustedLen>(iter: T) -> Self;
}
3 changes: 2 additions & 1 deletion polars/polars-core/src/chunked_array/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ where
T: PolarsPrimitiveType,
{
fn new_from_slice(name: &str, v: &[T::Native]) -> Self {
Self::new_from_iter(name, v.iter().copied())
let arr = PrimitiveArray::<T::Native>::from_slice(v).to(T::get_dtype().to_arrow());
ChunkedArray::new_from_chunks(name, vec![Arc::new(arr)])
}

fn new_from_opt_slice(name: &str, opt_v: &[Option<T::Native>]) -> Self {
Expand Down
50 changes: 39 additions & 11 deletions polars/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,17 @@ pub mod par;

/// A `PolarsIterator` is an iterator over a `ChunkedArray` which contains polars types. A `PolarsIterator`
/// must implement `ExactSizeIterator` and `DoubleEndedIterator`.
pub trait PolarsIterator: ExactSizeIterator + DoubleEndedIterator + Send + Sync {}
pub trait PolarsIterator:
ExactSizeIterator + DoubleEndedIterator + Send + Sync + TrustedLen
{
}
unsafe impl<'a, I> TrustedLen for Box<dyn PolarsIterator<Item = I> + 'a> {}

/// Implement PolarsIterator for every iterator that implements the needed traits.
impl<T: ?Sized> PolarsIterator for T where T: ExactSizeIterator + DoubleEndedIterator + Send + Sync {}
impl<T: ?Sized> PolarsIterator for T where
T: ExactSizeIterator + DoubleEndedIterator + Send + Sync + TrustedLen
{
}

impl<'a, T> IntoIterator for &'a ChunkedArray<T>
where
Expand Down Expand Up @@ -110,8 +117,13 @@ impl BooleanChunked {
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
&self,
) -> impl Iterator<Item = bool> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator
{
) -> impl Iterator<Item = bool>
+ '_
+ Send
+ Sync
+ ExactSizeIterator
+ DoubleEndedIterator
+ TrustedLen {
self.downcast_iter()
.map(|bool_arr| BoolIterNoNull::new(bool_arr))
.flatten()
Expand Down Expand Up @@ -183,8 +195,13 @@ impl Utf8Chunked {
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter<'a>(
&'a self,
) -> impl Iterator<Item = &'a str> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator
{
) -> impl Iterator<Item = &'a str>
+ '_
+ Send
+ Sync
+ ExactSizeIterator
+ DoubleEndedIterator
+ TrustedLen {
self.downcast_iter()
.map(|arr| Utf8IterNoNull::new(arr))
.flatten()
Expand Down Expand Up @@ -264,8 +281,13 @@ impl ListChunked {
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
&self,
) -> impl Iterator<Item = Series> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator
{
) -> impl Iterator<Item = Series>
+ '_
+ Send
+ Sync
+ ExactSizeIterator
+ DoubleEndedIterator
+ TrustedLen {
self.downcast_iter()
.map(|arr| ListIterNoNull::new(arr))
.flatten()
Expand All @@ -290,7 +312,8 @@ impl<T: PolarsObject> ObjectChunked<T> {
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
&self,
) -> impl Iterator<Item = &T> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator {
) -> impl Iterator<Item = &T> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator + TrustedLen
{
self.downcast_iter()
.map(|arr| arr.values().iter())
.flatten()
Expand Down Expand Up @@ -343,8 +366,13 @@ impl CategoricalChunked {
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
&self,
) -> impl Iterator<Item = u32> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator
{
) -> impl Iterator<Item = u32>
+ '_
+ Send
+ Sync
+ ExactSizeIterator
+ DoubleEndedIterator
+ TrustedLen {
self.deref().into_no_null_iter()
}
}
Expand Down
10 changes: 8 additions & 2 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub mod strings;
#[cfg(feature = "temporal")]
#[cfg_attr(docsrs, doc(cfg(feature = "temporal")))]
pub mod temporal;
mod trusted_len;
pub mod upstream_traits;

use arrow::array::Array;
Expand Down Expand Up @@ -645,8 +646,13 @@ where
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
&self,
) -> impl Iterator<Item = T::Native> + '_ + Send + Sync + ExactSizeIterator + DoubleEndedIterator
{
) -> impl Iterator<Item = T::Native>
+ '_
+ Send
+ Sync
+ ExactSizeIterator
+ DoubleEndedIterator
+ TrustedLen {
// .copied was significantly slower in benchmark, next call did not inline?
#[allow(clippy::map_clone)]
self.data_views()
Expand Down
33 changes: 20 additions & 13 deletions polars/polars-core/src/chunked_array/ops/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,30 @@ use std::convert::TryFrom;
macro_rules! apply {
($self:expr, $f:expr) => {{
if $self.null_count() == 0 {
$self.into_no_null_iter().map($f).collect()
$self.into_no_null_iter().map($f).collect_trusted()
} else {
$self.into_iter().map(|opt_v| opt_v.map($f)).collect()
$self
.into_iter()
.map(|opt_v| opt_v.map($f))
.collect_trusted()
}
}};
}

macro_rules! apply_enumerate {
($self:expr, $f:expr) => {{
if $self.null_count() == 0 {
$self.into_no_null_iter().enumerate().map($f).collect()
$self
.into_no_null_iter()
.enumerate()
.map($f)
.collect_trusted()
} else {
$self
.into_iter()
.enumerate()
.map(|(idx, opt_v)| opt_v.map(|v| $f((idx, v))))
.collect()
.collect_trusted()
}
}};
}
Expand Down Expand Up @@ -95,7 +102,7 @@ where
.flatten()
.trust_my_length(self.len())
.map(|v| f(v.copied()))
.collect()
.collect_trusted()
}

fn apply_with_idx<F>(&'a self, f: F) -> Self
Expand All @@ -111,7 +118,7 @@ where
.trust_my_length(self.len())
.enumerate()
.map(|(idx, opt_v)| opt_v.map(|v| f((idx, *v))))
.collect()
.collect_trusted()
}
}

Expand All @@ -124,7 +131,7 @@ where
.trust_my_length(self.len())
.enumerate()
.map(|(idx, v)| f((idx, v.copied())))
.collect()
.collect_trusted()
}
fn apply_to_slice<F, V>(&'a self, f: F, slice: &mut [V])
where
Expand Down Expand Up @@ -181,7 +188,7 @@ impl<'a> ChunkApply<'a, bool, bool> for BooleanChunked {
where
F: Fn(Option<bool>) -> Option<bool> + Copy,
{
self.into_iter().map(f).collect()
self.into_iter().map(f).collect_trusted()
}

fn apply_with_idx<F>(&'a self, f: F) -> Self
Expand All @@ -195,7 +202,7 @@ impl<'a> ChunkApply<'a, bool, bool> for BooleanChunked {
where
F: Fn((usize, Option<bool>)) -> Option<bool> + Copy,
{
self.into_iter().enumerate().map(f).collect()
self.into_iter().enumerate().map(f).collect_trusted()
}

fn apply_to_slice<F, T>(&'a self, f: F, slice: &mut [T])
Expand Down Expand Up @@ -263,7 +270,7 @@ impl<'a> ChunkApply<'a, &'a str, Cow<'a, str>> for Utf8Chunked {
where
F: Fn(Option<&'a str>) -> Option<Cow<'a, str>> + Copy,
{
self.into_iter().map(f).collect()
self.into_iter().map(f).collect_trusted()
}

fn apply_with_idx<F>(&'a self, f: F) -> Self
Expand All @@ -277,7 +284,7 @@ impl<'a> ChunkApply<'a, &'a str, Cow<'a, str>> for Utf8Chunked {
where
F: Fn((usize, Option<&'a str>)) -> Option<Cow<'a, str>> + Copy,
{
self.into_iter().enumerate().map(f).collect()
self.into_iter().enumerate().map(f).collect_trusted()
}

fn apply_to_slice<F, T>(&'a self, f: F, slice: &mut [T])
Expand Down Expand Up @@ -422,7 +429,7 @@ impl<'a> ChunkApply<'a, Series, Series> for ListChunked {
where
F: Fn(Option<Series>) -> Option<Series> + Copy,
{
self.into_iter().map(f).collect()
self.into_iter().map(f).collect_trusted()
}

/// Apply a closure elementwise. The closure gets the index of the element as first argument.
Expand All @@ -438,7 +445,7 @@ impl<'a> ChunkApply<'a, Series, Series> for ListChunked {
where
F: Fn((usize, Option<Series>)) -> Option<Series> + Copy,
{
self.into_iter().enumerate().map(f).collect()
self.into_iter().enumerate().map(f).collect_trusted()
}

fn apply_to_slice<F, T>(&'a self, f: F, slice: &mut [T])
Expand Down

0 comments on commit 7675e2b

Please sign in to comment.