Skip to content

Commit

Permalink
Remove redundant collect from downcast_chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 10, 2021
1 parent cc50b1d commit 3e6b79e
Show file tree
Hide file tree
Showing 14 changed files with 187 additions and 181 deletions.
1 change: 0 additions & 1 deletion polars/polars-core/src/chunked_array/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ where
let (lhs, rhs) = align_chunks_binary(lhs, rhs);
let chunks = lhs
.downcast_chunks()
.iter()
.zip(rhs.downcast_chunks())
.map(|(lhs, rhs)| Arc::new(kernel(lhs, rhs).expect("output")) as ArrayRef)
.collect();
Expand Down
3 changes: 0 additions & 3 deletions polars/polars-core/src/chunked_array/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ where
) -> Result<BooleanChunked> {
let chunks = self
.downcast_chunks()
.iter()
.zip(rhs.downcast_chunks())
.map(|(left, right)| {
let arr_res = operator(left, right);
Expand Down Expand Up @@ -572,7 +571,6 @@ impl BooleanChunked {
) -> Result<BooleanChunked> {
let chunks = self
.downcast_chunks()
.iter()
.zip(rhs.downcast_chunks())
.map(|(left, right)| {
let arr_res = operator(left, right);
Expand Down Expand Up @@ -646,7 +644,6 @@ impl Not for &BooleanChunked {
fn not(self) -> Self::Output {
let chunks = self
.downcast_chunks()
.iter()
.map(|a| {
let arr = compute::not(a).expect("should not fail");
Arc::new(arr) as ArrayRef
Expand Down
41 changes: 20 additions & 21 deletions polars/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::datatypes::CategoricalChunked;
use crate::prelude::{
BooleanChunked, ChunkedArray, Downcast, ListChunked, PolarsNumericType, Series, UnsafeValue,
Utf8Chunked,
BooleanChunked, ChunkedArray, ListChunked, PolarsNumericType, Series, UnsafeValue, Utf8Chunked,
};
use arrow::array::{
Array, ArrayData, ArrayRef, BooleanArray, LargeListArray, LargeStringArray, PrimitiveArray,
Expand Down Expand Up @@ -88,7 +87,7 @@ where
T: PolarsNumericType,
{
fn new(ca: &'a ChunkedArray<T>) -> Self {
let chunk = ca.downcast_chunks()[0];
let chunk = ca.downcast_chunks().next().unwrap();
let slice = chunk.values();
let iter = slice.iter().copied();

Expand Down Expand Up @@ -138,8 +137,8 @@ where
T: PolarsNumericType,
{
fn new(ca: &'a ChunkedArray<T>) -> Self {
let chunks = ca.downcast_chunks();
let arr = chunks[0];
let mut chunks = ca.downcast_chunks();
let arr = chunks.next().unwrap();
let idx_left = 0;
let idx_right = arr.len();

Expand Down Expand Up @@ -250,7 +249,8 @@ where
}
}
fn new(ca: &'a ChunkedArray<T>) -> Self {
let chunks = ca.downcast_chunks();
// TODO: traverse iterator without collect
let chunks: Vec<_> = ca.downcast_chunks().collect();
let current_iter_left = chunks[0].values().iter().copied();

let idx_left = 0;
Expand Down Expand Up @@ -292,7 +292,7 @@ where
// iterators have met in the middle or at the end
if self.idx_left == self.idx_right {
return None;
// one chunk is finished but there are still more chunks
// one chunk is finished but there are still more chunks
} else {
self.chunk_idx_left += 1;
self.set_current_iter_left();
Expand Down Expand Up @@ -327,7 +327,7 @@ where
// iterators have met in the middle or at the beginning
if self.idx_left == self.idx_right {
return None;
// one chunk is finished but there are still more chunks
// one chunk is finished but there are still more chunks
} else {
self.chunk_idx_right -= 1;
self.set_current_iter_right();
Expand Down Expand Up @@ -406,7 +406,8 @@ where
}

fn new(ca: &'a ChunkedArray<T>) -> Self {
let chunks = ca.downcast_chunks();
// TODO: traverse without collect
let chunks: Vec<_> = ca.downcast_chunks().collect();
let arr_left = chunks[0];
let current_iter_left = arr_left.values().iter().copied();
let current_data_left = arr_left.data();
Expand Down Expand Up @@ -456,7 +457,7 @@ where
// iterators have met in the middle or at the end
if self.idx_left == self.idx_right {
return None;
// one chunk is finished but there are still more chunks
// one chunk is finished but there are still more chunks
} else {
self.chunk_idx_left += 1;
// reset the index
Expand Down Expand Up @@ -501,7 +502,7 @@ where
// iterators have met in the middle or at the beginning
if self.idx_left == self.idx_right {
return None;
// one chunk is finished but there are still more chunks
// one chunk is finished but there are still more chunks
} else {
self.chunk_idx_right -= 1;
self.set_current_iter_right();
Expand Down Expand Up @@ -536,8 +537,7 @@ where
type IntoIter = Box<dyn PolarsIterator<Item = Self::Item> + 'a>;

fn into_iter(self) -> Self::IntoIter {
let chunks = self.downcast_chunks();
match chunks.len() {
match self.chunks.len() {
1 => {
if self.null_count() == 0 {
Box::new(SomeIterator(NumIterSingleChunk::new(self)))
Expand Down Expand Up @@ -623,8 +623,7 @@ macro_rules! impl_single_chunk_iterator {

impl<'a> $iterator_name<'a> {
fn new(ca: &'a $ca_type) -> Self {
let chunks = ca.downcast_chunks();
let current_array = chunks[0];
let current_array = ca.downcast_chunks().next().unwrap();
let idx_left = 0;
let idx_right = current_array.len();

Expand Down Expand Up @@ -752,8 +751,7 @@ macro_rules! impl_single_chunk_null_check_iterator {

impl<'a> $iterator_name<'a> {
fn new(ca: &'a $ca_type) -> Self {
let chunks = ca.downcast_chunks();
let current_array = chunks[0];
let current_array = ca.downcast_chunks().next().unwrap();
let current_data = current_array.data();
let idx_left = 0;
let idx_right = current_array.len();
Expand Down Expand Up @@ -893,7 +891,8 @@ macro_rules! impl_many_chunk_iterator {

impl<'a> $iterator_name<'a> {
fn new(ca: &'a $ca_type) -> Self {
let chunks = ca.downcast_chunks();
// TODO: fix without collect
let chunks: Vec<_> = ca.downcast_chunks().collect();
let current_array_left = chunks[0];
let idx_left = 0;
let chunk_idx_left = 0;
Expand Down Expand Up @@ -1087,7 +1086,8 @@ macro_rules! impl_many_chunk_null_check_iterator {

impl<'a> $iterator_name<'a> {
fn new(ca: &'a $ca_type) -> Self {
let chunks = ca.downcast_chunks();
// TODO: fix without collect
let chunks: Vec<_> = ca.downcast_chunks().collect();
let current_array_left = chunks[0];
let current_data_left = current_array_left.data();
let idx_left = 0;
Expand Down Expand Up @@ -1281,8 +1281,7 @@ macro_rules! impl_into_polars_iterator {
/// Decides which iterator fits best the current chunked array. The decision are based
/// on the number of chunks and the existence of null values.
fn into_iter(self) -> Self::IntoIter {
let chunks = self.downcast_chunks();
match chunks.len() {
match self.chunks.len() {
1 => {
if self.null_count() == 0 {
Box::new(SomeIterator($single_chunk_ident::new(self)))
Expand Down
92 changes: 38 additions & 54 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,11 @@ where
/// Contiguous slice
pub fn cont_slice(&self) -> Result<&[T::Native]> {
if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
Ok(self.downcast_chunks()[0].values())
Ok(self
.downcast_chunks()
.next()
.map(|arr| arr.values())
.unwrap())
} else {
Err(PolarsError::NoSlice)
}
Expand All @@ -711,10 +715,7 @@ where
/// NOTE: null values should be taken into account by the user of these slices as they are handled
/// separately
pub fn data_views(&self) -> Vec<&[T::Native]> {
self.downcast_chunks()
.iter()
.map(|arr| arr.values())
.collect()
self.downcast_chunks().map(|arr| arr.values()).collect()
}

/// If [cont_slice](#method.cont_slice) is successful a closure is mapped over the elements.
Expand Down Expand Up @@ -824,74 +825,57 @@ impl<T> Clone for ChunkedArray<T> {
}
}

pub trait Downcast<T> {
fn downcast_chunks(&self) -> Vec<&T>;
}

impl<T> Downcast<PrimitiveArray<T>> for ChunkedArray<T>
impl<T> ChunkedArray<T>
where
T: PolarsPrimitiveType,
{
fn downcast_chunks(&self) -> Vec<&PrimitiveArray<T>> {
self.chunks
.iter()
.map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const PrimitiveArray<T>) }
})
.collect::<Vec<_>>()
pub fn downcast_chunks(
&self,
) -> impl Iterator<Item = &PrimitiveArray<T>> + DoubleEndedIterator {
self.chunks.iter().map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const PrimitiveArray<T>) }
})
}
}

impl Downcast<BooleanArray> for BooleanChunked {
fn downcast_chunks(&self) -> Vec<&BooleanArray> {
self.chunks
.iter()
.map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const BooleanArray) }
})
.collect::<Vec<_>>()
impl BooleanChunked {
pub fn downcast_chunks(&self) -> impl Iterator<Item = &BooleanArray> + DoubleEndedIterator {
self.chunks.iter().map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const BooleanArray) }
})
}
}

impl Downcast<LargeStringArray> for Utf8Chunked {
fn downcast_chunks(&self) -> Vec<&LargeStringArray> {
self.chunks
.iter()
.map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const LargeStringArray) }
})
.collect::<Vec<_>>()
impl Utf8Chunked {
pub fn downcast_chunks(&self) -> impl Iterator<Item = &LargeStringArray> + DoubleEndedIterator {
self.chunks.iter().map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const LargeStringArray) }
})
}
}

impl Downcast<LargeListArray> for ListChunked {
fn downcast_chunks(&self) -> Vec<&LargeListArray> {
self.chunks
.iter()
.map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const LargeListArray) }
})
.collect::<Vec<_>>()
impl ListChunked {
pub fn downcast_chunks(&self) -> impl Iterator<Item = &LargeListArray> + DoubleEndedIterator {
self.chunks.iter().map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const LargeListArray) }
})
}
}

#[cfg(feature = "object")]
impl<T> Downcast<ObjectArray<T>> for ObjectChunked<T>
impl<T> ObjectChunked<T>
where
T: 'static + std::fmt::Debug + Clone + Send + Sync + Default,
{
fn downcast_chunks(&self) -> Vec<&ObjectArray<T>> {
self.chunks
.iter()
.map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const ObjectArray<T>) }
})
.collect::<Vec<_>>()
pub fn downcast_chunks(&self) -> impl Iterator<Item = &ObjectArray<T>> + DoubleEndedIterator {
self.chunks.iter().map(|arr| {
let arr = &**arr;
unsafe { &*(arr as *const dyn Array as *const ObjectArray<T>) }
})
}
}

Expand Down
9 changes: 3 additions & 6 deletions polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ where
{
fn sum(&self) -> Option<T::Native> {
self.downcast_chunks()
.iter()
.map(|&a| compute::sum(a))
.map(|a| compute::sum(a))
.fold(None, |acc, v| match v {
Some(v) => match acc {
None => Some(v),
Expand All @@ -101,8 +100,7 @@ where
DataType::Float64 => agg_float_with_nans!(self, min, f64),
_ => self
.downcast_chunks()
.iter()
.filter_map(|&a| compute::min(a))
.filter_map(|a| compute::min(a))
.fold_first_(|acc, v| if acc < v { acc } else { v }),
}
}
Expand All @@ -113,8 +111,7 @@ where
DataType::Float64 => agg_float_with_nans!(self, max, f64),
_ => self
.downcast_chunks()
.iter()
.filter_map(|&a| compute::max(a))
.filter_map(|a| compute::max(a))
.fold_first_(|acc, v| if acc > v { acc } else { v }),
}
}
Expand Down
22 changes: 11 additions & 11 deletions polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,19 @@ where
// todo! use iterators once implemented
// no_null path
if self.null_count() == 0 {
for idx in 0..self.len() {
let (chunk_idx, idx) = self.index_to_chunked_index(idx);
let arr = unsafe { &**chunks.get_unchecked(chunk_idx) };
builder.append_value(arr.value(idx).clone())
for arr in chunks {
for idx in 0..arr.len() {
builder.append_value(arr.value(idx).clone())
}
}
} else {
for idx in 0..self.len() {
let (chunk_idx, idx) = self.index_to_chunked_index(idx);
let arr = unsafe { &**chunks.get_unchecked(chunk_idx) };
if arr.is_valid(idx) {
builder.append_value(arr.value(idx).clone())
} else {
builder.append_null()
for arr in chunks {
for idx in 0..arr.len() {
if arr.is_valid(idx) {
builder.append_value(arr.value(idx).clone())
} else {
builder.append_null()
}
}
}
}
Expand Down
10 changes: 8 additions & 2 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ impl ChunkExplode for ListChunked {
// of the list. And we also return a slice of the offsets. This slice can be used to find the old
// list layout or indexes to expand the DataFrame in the same manner as the 'explode' operation
let ca = self.rechunk();
let listarr: &LargeListArray = ca.downcast_chunks()[0];
let listarr: &LargeListArray = ca
.downcast_chunks()
.next()
.ok_or_else(|| PolarsError::NoData("cannot explode empty list".into()))?;
let list_data = listarr.data();
let values = listarr.values();
let offset_ptr = list_data.buffers()[0].as_ptr() as *const i64;
Expand All @@ -51,7 +54,10 @@ impl ChunkExplode for Utf8Chunked {
// of the list. And we also return a slice of the offsets. This slice can be used to find the old
// list layout or indexes to expand the DataFrame in the same manner as the 'explode' operation
let ca = self.rechunk();
let stringarr: &LargeStringArray = ca.downcast_chunks()[0];
let stringarr: &LargeStringArray = ca
.downcast_chunks()
.next()
.ok_or_else(|| PolarsError::NoData("cannot explode empty str".into()))?;
let list_data = stringarr.data();
let str_values_buf = stringarr.value_data();

Expand Down

0 comments on commit 3e6b79e

Please sign in to comment.