Skip to content

Commit

Permalink
perf: Don't rechunk in parallel collection
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 26, 2024
1 parent e541e96 commit c3d9c70
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 234 deletions.
39 changes: 25 additions & 14 deletions crates/polars-arrow/src/array/boolean/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,23 +98,30 @@ impl MutableBooleanArray {
}
}

#[inline]
pub fn push_value(&mut self, value: bool) {
self.values.push(value);
match &mut self.validity {
Some(validity) => validity.push(true),
None => {},
}
}

#[inline]
pub fn push_null(&mut self) {
self.values.push(false);
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
}

/// Pushes a new entry to [`MutableBooleanArray`].
#[inline]
pub fn push(&mut self, value: Option<bool>) {
match value {
Some(value) => {
self.values.push(value);
match &mut self.validity {
Some(validity) => validity.push(true),
None => {},
}
},
None => {
self.values.push(false);
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
},
Some(value) => self.push_value(value),
None => self.push_null(),
}
}

Expand Down Expand Up @@ -232,6 +239,10 @@ impl MutableBooleanArray {
let a: BooleanArray = self.into();
Arc::new(a)
}

pub fn freeze(self) -> BooleanArray {
self.into()
}
}

/// Getters
Expand Down
130 changes: 126 additions & 4 deletions crates/polars-arrow/src/pushable.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::array::{MutableBinaryViewArray, MutablePrimitiveArray, ViewType};
use crate::array::{MutableBinaryViewArray, MutableBooleanArray, MutablePrimitiveArray, ViewType};
use crate::bitmap::MutableBitmap;
use crate::offset::{Offset, Offsets};
use crate::types::NativeType;
Expand Down Expand Up @@ -141,14 +141,22 @@ impl<T: NativeType> Pushable<Option<T>> for MutablePrimitiveArray<T> {
}
}

impl<T: ViewType + ?Sized> Pushable<&T> for MutableBinaryViewArray<T> {
pub trait NoOption {}
impl NoOption for &str {}
impl NoOption for &[u8] {}

impl<T, K> Pushable<T> for MutableBinaryViewArray<K>
where
T: AsRef<K> + NoOption,
K: ViewType + ?Sized,
{
#[inline]
fn reserve(&mut self, additional: usize) {
MutableBinaryViewArray::reserve(self, additional)
}

#[inline]
fn push(&mut self, value: &T) {
fn push(&mut self, value: T) {
MutableBinaryViewArray::push_value(self, value)
}

Expand All @@ -161,7 +169,8 @@ impl<T: ViewType + ?Sized> Pushable<&T> for MutableBinaryViewArray<T> {
MutableBinaryViewArray::push_null(self)
}

fn extend_constant(&mut self, additional: usize, value: &T) {
fn extend_constant(&mut self, additional: usize, value: T) {
let value = value.as_ref();
// First push a value to get the View
MutableBinaryViewArray::push_value(self, value);

Expand All @@ -184,3 +193,116 @@ impl<T: ViewType + ?Sized> Pushable<&T> for MutableBinaryViewArray<T> {
self.extend_null(additional);
}
}

impl<T, K> Pushable<Option<T>> for MutableBinaryViewArray<K>
where
T: AsRef<K>,
K: ViewType + ?Sized,
{
#[inline]
fn reserve(&mut self, additional: usize) {
MutableBinaryViewArray::reserve(self, additional)
}

#[inline]
fn push(&mut self, value: Option<T>) {
MutableBinaryViewArray::push(self, value.as_ref())
}

#[inline]
fn len(&self) -> usize {
MutableBinaryViewArray::len(self)
}

fn push_null(&mut self) {
MutableBinaryViewArray::push_null(self)
}

fn extend_constant(&mut self, additional: usize, value: Option<T>) {
let value = value.as_ref();
// First push a value to get the View
MutableBinaryViewArray::push(self, value);

// And then use that new view to extend
let views = self.views_mut();
let view = *views.last().unwrap();

let remaining = additional - 1;
for _ in 0..remaining {
views.push(view);
}

if let Some(bitmap) = self.validity() {
bitmap.extend_constant(remaining, true)
}
}

#[inline]
fn extend_null_constant(&mut self, additional: usize) {
self.extend_null(additional);
}
}

impl Pushable<bool> for MutableBooleanArray {
#[inline]
fn reserve(&mut self, additional: usize) {
MutableBooleanArray::reserve(self, additional)
}

#[inline]
fn push(&mut self, value: bool) {
MutableBooleanArray::push_value(self, value)
}

#[inline]
fn len(&self) -> usize {
self.values().len()
}

#[inline]
fn push_null(&mut self) {
unreachable!()
}

#[inline]
fn extend_constant(&mut self, additional: usize, value: bool) {
MutableBooleanArray::extend_constant(self, additional, Some(value))
}

#[inline]
fn extend_null_constant(&mut self, _additional: usize) {
unreachable!()
}
}

impl Pushable<Option<bool>> for MutableBooleanArray {
#[inline]
fn reserve(&mut self, additional: usize) {
MutableBooleanArray::reserve(self, additional)
}

#[inline]
fn push(&mut self, value: Option<bool>) {
MutableBooleanArray::push(self, value)
}

#[inline]
fn len(&self) -> usize {
self.values().len()
}

#[inline]
fn push_null(&mut self) {
MutableBooleanArray::push_null(self)
}

#[inline]
fn extend_constant(&mut self, additional: usize, value: Option<bool>) {
MutableBooleanArray::extend_constant(self, additional, value)
}

#[inline]
fn extend_null_constant(&mut self, additional: usize) {
MutableBooleanArray::extend_constant(self, additional, None)
}
}
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/builder/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ impl ChunkedBuilder<bool, BooleanType> for BooleanChunkedBuilder {
/// Appends a value of type `T` into the builder
#[inline]
fn append_value(&mut self, v: bool) {
self.array_builder.push(Some(v));
self.array_builder.push_value(v);
}

/// Appends a null slot into the builder
#[inline]
fn append_null(&mut self) {
self.array_builder.push(None);
self.array_builder.push_null();
}

fn finish(mut self) -> BooleanChunked {
Expand Down
28 changes: 28 additions & 0 deletions crates/polars-core/src/chunked_array/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,31 @@ impl BooleanChunked {
Self::with_chunk(name, arr)
}
}

impl<'a, T> From<&'a ChunkedArray<T>> for Vec<Option<T::Physical<'a>>>
where
T: PolarsDataType,
{
fn from(ca: &'a ChunkedArray<T>) -> Self {
let mut out = Vec::with_capacity(ca.len());
for arr in ca.downcast_iter() {
out.extend(arr.iter())
}
out
}
}
impl From<StringChunked> for Vec<Option<String>> {
fn from(ca: StringChunked) -> Self {
ca.iter().map(|opt| opt.map(|s| s.to_string())).collect()
}
}

impl From<BooleanChunked> for Vec<Option<bool>> {
fn from(ca: BooleanChunked) -> Self {
let mut out = Vec::with_capacity(ca.len());
for arr in ca.downcast_iter() {
out.extend(arr.iter())
}
out
}
}

0 comments on commit c3d9c70

Please sign in to comment.