Skip to content

Commit

Permalink
improve expressions and ListChunked::from_iter perf (#2962)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 24, 2022
1 parent be59e8b commit a28dfa7
Show file tree
Hide file tree
Showing 15 changed files with 167 additions and 2,953 deletions.
23 changes: 20 additions & 3 deletions polars/polars-arrow/src/array/list.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use arrow::array::{Array, ListArray};
use arrow::array::{Array, ArrayRef, ListArray};
use arrow::bitmap::MutableBitmap;
use arrow::compute::concatenate;
use arrow::datatypes::DataType;
use arrow::error::Result;

pub struct AnonymousBuilder<'a> {
Expand All @@ -26,6 +27,10 @@ impl<'a> AnonymousBuilder<'a> {
*self.offsets.last().unwrap()
}

pub fn is_empty(&self) -> bool {
self.arrays.is_empty()
}

pub fn push(&mut self, arr: &'a dyn Array) {
self.size += arr.len() as i64;
self.offsets.push(self.size);
Expand All @@ -35,6 +40,18 @@ impl<'a> AnonymousBuilder<'a> {
validity.push(true)
}
}

pub fn push_multiple(&mut self, arrs: &'a [ArrayRef]) {
for arr in arrs {
self.size += arr.len() as i64;
self.arrays.push(arr.as_ref());
}
self.offsets.push(self.size);
if let Some(validity) = &mut self.validity {
validity.push(true)
}
}

pub fn push_null(&mut self) {
self.offsets.push(self.last_offset());
match &mut self.validity {
Expand All @@ -52,8 +69,8 @@ impl<'a> AnonymousBuilder<'a> {
self.validity = Some(validity)
}

pub fn finish(self) -> Result<ListArray<i64>> {
let inner_dtype = self.arrays[0].data_type();
pub fn finish(self, inner_dtype: Option<&DataType>) -> Result<ListArray<i64>> {
let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].data_type());
let values = concatenate::concatenate(&self.arrays)?;

let dtype = ListArray::<i64>::default_datatype(inner_dtype.clone());
Expand Down
20 changes: 15 additions & 5 deletions polars/polars-core/src/chunked_array/builder/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,13 +370,15 @@ pub fn get_list_builder(
pub struct AnonymousListBuilder<'a> {
name: String,
builder: AnonymousBuilder<'a>,
pub dtype: DataType,
}

impl<'a> AnonymousListBuilder<'a> {
pub fn new(name: &str, capacity: usize) -> Self {
pub fn new(name: &str, capacity: usize, dtype: DataType) -> Self {
Self {
name: name.into(),
builder: AnonymousBuilder::new(capacity),
dtype,
}
}

Expand All @@ -394,12 +396,20 @@ impl<'a> AnonymousListBuilder<'a> {
}

pub fn append_series(&mut self, s: &'a Series) {
assert_eq!(s.chunks().len(), 1);
self.builder.push(s.chunks()[0].as_ref())
self.builder.push_multiple(s.chunks());
}

pub fn finish(self) -> ListChunked {
let arr = self.builder.finish().unwrap();
ListChunked::from_chunks(&self.name, vec![Arc::new(arr)])
if self.builder.is_empty() {
ListChunked::full_null_with_dtype(&self.name, 0, &self.dtype)
} else {
let arr = self
.builder
.finish(Some(&self.dtype.to_physical().to_arrow()))
.unwrap();
let mut ca = ListChunked::from_chunks("", vec![Arc::new(arr)]);
ca.field = Arc::new(Field::new(&self.name, DataType::List(Box::new(self.dtype))));
ca
}
}
}
4 changes: 0 additions & 4 deletions polars/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@ use std::convert::TryFrom;

type LargeStringArray = Utf8Array<i64>;
type LargeListArray = ListArray<i64>;

// If parallel feature is enable, then, activate the parallel module.
#[cfg(feature = "parallel")]
#[cfg_attr(docsrs, doc(cfg(feature = "parallel")))]
pub mod par;

/// A `PolarsIterator` is an iterator over a `ChunkedArray` which contains polars types. A `PolarsIterator`
Expand Down

0 comments on commit a28dfa7

Please sign in to comment.