Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Removed un-necessary allocation in assign_ops #1085

Merged
merged 1 commit into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions benches/assign_ops.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::compute::arity_assign::binary;
use arrow2::compute::arity_assign::{binary, unary};
use arrow2::{
compute::arithmetics::basic::{mul, mul_scalar},
util::bench_util::*,
Expand All @@ -13,8 +13,7 @@ fn add_benchmark(c: &mut Criterion) {
let mut arr_a = create_primitive_array::<f32>(size, 0.2);
c.bench_function(&format!("apply_mul 2^{}", log2_size), |b| {
b.iter(|| {
criterion::black_box(&mut arr_a)
.apply_values_mut(|x| x.iter_mut().for_each(|x| *x *= 1.01));
unary(criterion::black_box(&mut arr_a), |x| x * 1.01);
assert!(!arr_a.value(10).is_nan());
})
});
Expand All @@ -30,7 +29,7 @@ fn add_benchmark(c: &mut Criterion) {
let mut arr_a = create_primitive_array::<f32>(size, 0.2);
let mut arr_b = create_primitive_array_with_seed::<f32>(size, 0.2, 10);
// convert to be close to 1.01
arr_b.apply_values_mut(|x| x.iter_mut().for_each(|x| *x = 1.01 + *x / 20.0));
unary(&mut arr_b, |x| 1.01 + x / 20.0);

c.bench_function(&format!("apply_mul null 2^{}", log2_size), |b| {
b.iter(|| {
Expand Down
8 changes: 5 additions & 3 deletions examples/cow.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
// This example demos how to operate on arrays in-place.
use arrow2::array::{Array, PrimitiveArray};
use arrow2::compute::arity_assign;

fn main() {
// say we have have received an array
// say we have have received an `Array`
let mut array: Box<dyn Array> = PrimitiveArray::from_vec(vec![1i32, 2]).boxed();

// we can apply a transformation to its values without allocating a new array as follows:

// 1. downcast it to the correct type (known via `array.data_type().to_physical_type()`)
let array = array
.as_any_mut()
.downcast_mut::<PrimitiveArray<i32>>()
.unwrap();

// 2. call `apply_values` with the function to apply over the values
array.apply_values_mut(|x| x.iter_mut().for_each(|x| *x *= 10));
// 2. call `unary` with the function to apply to each value
arity_assign::unary(array, |x| x * 10);

// confirm that it gives the right result :)
assert_eq!(array, &PrimitiveArray::from_vec(vec![10i32, 20]));
Expand Down
19 changes: 0 additions & 19 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,25 +236,6 @@ impl BooleanArray {
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let owned_validity = std::mem::take(validity);
let mut mut_bitmap = owned_validity.make_mut();
f(&mut mut_bitmap);
assert_eq!(mut_bitmap.len(), self.values.len());
*validity = mut_bitmap.into();
}
}

/// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
use Either::*;
Expand Down
55 changes: 10 additions & 45 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
bitmap::{
utils::{zip_validity, ZipValidity},
Bitmap, MutableBitmap,
Bitmap,
},
buffer::Buffer,
datatypes::*,
Expand Down Expand Up @@ -283,59 +283,24 @@ impl<T: NativeType> PrimitiveArray<T> {
self.values = values;
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics, if `f` modifies the length of `&mut [T]`
pub fn apply_values_mut<F: Fn(&mut [T])>(&mut self, f: F) {
let values = std::mem::take(&mut self.values);
let mut values = values.make_mut();
let len = values.len();
f(&mut values);
assert_eq!(values.len(), len, "values length must remain the same");
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let owned_validity = std::mem::take(validity);
let mut mut_bitmap = owned_validity.make_mut();
f(&mut mut_bitmap);
assert_eq!(mut_bitmap.len(), self.values.len());
*validity = mut_bitmap.into();
}
}

/// Applies a function `f` to the validity of this array, the caller can decide to make
/// it mutable or not.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut Bitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
f(validity);
/// This function panics if the function `f` modifies the length of the [`Bitmap`].
pub fn apply_validity<F: Fn(Bitmap) -> Bitmap>(&mut self, f: F) {
if let Some(validity) = std::mem::take(&mut self.validity) {
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
assert_eq!(validity.len(), self.values.len());
self.validity = Some(f(validity))
}
}

/// Returns an option of a mutable reference to the values of this [`PrimitiveArray`].
pub fn get_mut_values(&mut self) -> Option<&mut [T]> {
self.values.get_mut().map(|x| x.as_mut())
}

/// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.
///
/// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.
Expand Down
20 changes: 1 addition & 19 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,27 +90,9 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics, if `f` modifies the length of `&mut [T]`
/// This function panics iff `f` panics
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
let len = self.values.len();
f(&mut self.values);
assert_eq!(len, self.values.len(), "values length must remain the same")
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = &mut self.validity {
f(validity);
assert_eq!(validity.len(), self.values.len());
}
}
}

Expand Down
26 changes: 17 additions & 9 deletions src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,20 @@ impl<T: NativeType> Buffer<T> {
self.offset
}

/// Gets a mutable reference to its underlying [`Vec`], if it not being shared.
///
/// This operation returns a [`Vec`] iff this [`Buffer`]:
/// * is not an offsetted slice of another [`Buffer`]
/// * has not been cloned (i.e. [`Arc`]`::get_mut` yields [`Some`])
/// * has not been imported from the c data interface (FFI)
pub fn get_mut(&mut self) -> Option<&mut Vec<T>> {
if self.offset != 0 {
None
} else {
Arc::get_mut(&mut self.data).and_then(|b| b.get_vec())
}
}

/// Converts this [`Buffer`] to either a [`Buffer`] or a [`Vec`], returning itself if the conversion
/// is not possible
///
Expand All @@ -145,16 +159,10 @@ impl<T: NativeType> Buffer<T> {
/// * has not been cloned (i.e. [`Arc`]`::get_mut` yields [`Some`])
/// * has not been imported from the c data interface (FFI)
pub fn into_mut(mut self) -> Either<Self, Vec<T>> {
if self.offset != 0 {
Either::Left(self)
if let Some(vec) = self.get_mut() {
Either::Right(std::mem::take(vec))
} else {
match Arc::get_mut(&mut self.data).and_then(|b| b.get_vec()) {
Some(v) => {
let data = std::mem::take(v);
Either::Right(data)
}
None => Either::Left(self),
}
Either::Left(self)
}
}

Expand Down
77 changes: 36 additions & 41 deletions src/compute/arity_assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ use super::utils::check_same_len;
use crate::{array::PrimitiveArray, types::NativeType};
use either::Either;

/// Applies an unary function to a [`PrimitiveArray`] in-place via cow semantics.
/// Applies an unary function to a [`PrimitiveArray`], optionally in-place.
///
/// # Implementation
/// This is the fastest method to apply a binary operation and it is often vectorized (SIMD).
/// This function tries to apply the function directly to the values of the array.
/// If that region is shared, this function creates a new region and writes to it.
///
/// # Panics
/// This function panics iff
/// * the arrays have a different length.
Expand All @@ -18,14 +20,22 @@ where
I: NativeType,
F: Fn(I) -> I,
{
array.apply_values_mut(|values| values.iter_mut().for_each(|v| *v = op(*v)));
if let Some(values) = array.get_mut_values() {
// mutate in place
values.iter_mut().for_each(|l| *l = op(*l));
} else {
// alloc and write to new region
let values = array.values().iter().map(|l| op(*l)).collect::<Vec<_>>();
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
array.set_values(values.into());
}
}

/// Applies a binary operations to two [`PrimitiveArray`], applying the operation
/// in-place to the `lhs` via cow semantics.
/// Applies a binary function to two [`PrimitiveArray`]s, optionally in-place, returning
/// a new [`PrimitiveArray`].
///
/// # Implementation
/// This is the fastest way to perform a binary operation and it is often vectorized (SIMD).
/// This function tries to apply the function directly to the values of the array.
/// If that region is shared, this function creates a new region and writes to it.
/// # Panics
/// This function panics iff
/// * the arrays have a different length.
Expand All @@ -48,54 +58,39 @@ where
// bitmap has an offset.
if let Some(rhs) = rhs.validity() {
if lhs.validity().is_none() {
*lhs = lhs.with_validity(Some(rhs.clone()))
lhs.set_validity(Some(rhs.clone()));
} else {
lhs.apply_validity(|bitmap| {
// we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact
// so that we can later assign the result to out `&mut bitmap`
let owned_lhs = std::mem::take(bitmap);

*bitmap = match owned_lhs.into_mut() {
// we take alloc and write to new buffer
match bitmap.into_mut() {
Either::Left(immutable) => {
// we allocate a new bitmap
// alloc new region
&immutable & rhs
}
// we can mutate in place, happy days.
Either::Right(mut mutable) => {
// mutate in place
let mut mutable_ref = &mut mutable;
mutable_ref &= rhs;
mutable.into()
}
}
});
}
}
};

// we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact
// so that we can later assign the result to out `&mut lhs`
let owned_lhs = std::mem::take(lhs);

*lhs = match owned_lhs.into_mut() {
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
// we take alloc and write to new buffer
Either::Left(mut immutable) => {
let values = immutable
.values()
.iter()
.zip(rhs.values().iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<_>>();
immutable.set_values(values.into());
immutable
}
// we can mutate in place
Either::Right(mut mutable) => {
mutable.apply_values(|x| {
x.iter_mut()
.zip(rhs.values().iter())
.for_each(|(l, r)| *l = op(*l, *r))
});
mutable.into()
}
if let Some(values) = lhs.get_mut_values() {
// mutate values in place
values
.iter_mut()
.zip(rhs.values().iter())
.for_each(|(l, r)| *l = op(*l, *r));
} else {
// alloc new region
let values = lhs
.values()
.iter()
.zip(rhs.values().iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<_>>();
lhs.set_values(values.into());
}
}
24 changes: 0 additions & 24 deletions tests/it/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,27 +131,3 @@ fn from_iter() {
let a: BooleanArray = iter.collect();
assert_eq!(a.len(), 2);
}

#[test]
fn apply_values() {
let mut a = BooleanArray::from([Some(true), Some(false), None]);
a.apply_values_mut(|x| {
let mut a = std::mem::take(x);
a = !a;
*x = a;
});
let expected = BooleanArray::from([Some(false), Some(true), None]);
assert_eq!(a, expected);
}

#[test]
fn apply_validity() {
let mut a = BooleanArray::from([Some(true), Some(false), None]);
a.apply_validity_mut(|x| {
let mut a = std::mem::take(x);
a = !a;
*x = a;
});
let expected = BooleanArray::from([None, None, Some(false)]);
assert_eq!(a, expected);
}
22 changes: 0 additions & 22 deletions tests/it/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,25 +124,3 @@ fn into_mut_3() {
let array = PrimitiveArray::new(DataType::Int32, values, validity);
assert!(array.into_mut().is_right());
}

#[test]
fn apply_values() {
let mut a = PrimitiveArray::from([Some(1), Some(2), None]);
a.apply_values_mut(|x| {
x[0] = 10;
});
let expected = PrimitiveArray::from([Some(10), Some(2), None]);
assert_eq!(a, expected);
}

#[test]
fn apply_validity() {
let mut a = PrimitiveArray::from([Some(1), Some(2), None]);
a.apply_validity_mut(|x| {
let mut a = std::mem::take(x);
a = !a;
*x = a;
});
let expected = PrimitiveArray::from([None, None, Some(0)]);
assert_eq!(a, expected);
}