Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added example
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jun 8, 2022
1 parent d15814d commit 715fe71
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 26 deletions.
2 changes: 1 addition & 1 deletion benches/write_parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ type ChunkBox = Chunk<Box<dyn Array>>;

fn write(array: &dyn Array, encoding: Encoding) -> Result<()> {
let schema = Schema::from(vec![Field::new("c1", array.data_type().clone(), true)]);
let columns: ChunkBox = Chunk::new(vec![clone(array).into()]);
let columns: ChunkBox = Chunk::new(vec![clone(array)]);

let options = WriteOptions {
write_statistics: false,
Expand Down
42 changes: 42 additions & 0 deletions examples/cow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// This example demos how to operate on arrays in-place.
use arrow2::{
array::{Array, PrimitiveArray},
types::NativeType,
};

// this function will clone-on-write the array and apply `f` to its values
fn cow_apply<T: NativeType, F: Fn(&mut [T])>(array: &mut Box<dyn Array>, f: F) {
// 1. downcast the array to its concrete type
let array = array
.as_any_mut()
.downcast_mut::<PrimitiveArray<T>>()
.unwrap();

// 2. empty the mut reference and create a new array on the stack with its contents
let new_array = array.take();

// 3. deconstruct the array into its parts
let (dt, values, validity) = new_array.into_inner();

// 4. clone-on-write the values
let mut values = values.make_mut();

// 5. apply the function over the values
f(&mut values);

// 6. assign the new values to the array
array.try_assign(dt, values.into(), validity).unwrap();
}

fn main() {
// say we have have received an array
let mut array = PrimitiveArray::from_vec(vec![1i32, 2]).boxed();

// we can apply a transformation to its values without allocating a new array as follows:
cow_apply(&mut array, |values: &mut [i32]| {
values.iter_mut().for_each(|x| *x *= 10)
});

// confirm that it gives the right result :)
assert_eq!(array.as_ref(), PrimitiveArray::from_vec(vec![10i32, 20]));
}
13 changes: 13 additions & 0 deletions guide/src/high_level.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,16 @@ Some notes:
and cloned its validity. This approach is suitable for operations whose branching off
is more expensive than operating over all values. If the operation is expensive,
then using `PrimitiveArray::<O>::from_trusted_len_iter` is likely faster.

## Clone on write semantics

We support the mutation of arrays in-place via clone-on-write semantics.
Essentially, all data is under an `Arc`, but it can be taken via `Arc::get_mut`
and operated in place.

Below is a complete example of how to operate on a `Box<dyn Array>` without
extra allocations.

```rust
{{#include ../../examples/cow.rs}}
```
73 changes: 48 additions & 25 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,28 @@ pub struct PrimitiveArray<T: NativeType> {
validity: Option<Bitmap>,
}

fn check<T: NativeType>(
data_type: &DataType,
values: &[T],
validity: &Option<Bitmap>,
) -> Result<(), Error> {
if validity
.as_ref()
.map_or(false, |validity| validity.len() != values.len())
{
return Err(Error::oos(
"validity mask length must match the number of values",
));
}

if data_type.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {
return Err(Error::oos(
"BooleanArray can only be initialized with a DataType whose physical type is Primitive",
));
}
Ok(())
}

impl<T: NativeType> PrimitiveArray<T> {
/// The canonical method to create a [`PrimitiveArray`] out of its internal components.
/// # Implementation
Expand All @@ -67,21 +89,7 @@ impl<T: NativeType> PrimitiveArray<T> {
values: Buffer<T>,
validity: Option<Bitmap>,
) -> Result<Self, Error> {
if validity
.as_ref()
.map_or(false, |validity| validity.len() != values.len())
{
return Err(Error::oos(
"validity mask length must match the number of values",
));
}

if data_type.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {
return Err(Error::oos(
"BooleanArray can only be initialized with a DataType whose physical type is Primitive",
));
}

check(&data_type, &values, &validity)?;
Ok(Self {
data_type,
values,
Expand Down Expand Up @@ -109,14 +117,7 @@ impl<T: NativeType> PrimitiveArray<T> {
#[inline]
#[must_use]
pub fn to(self, data_type: DataType) -> Self {
if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) {
Err(Error::InvalidArgumentError(format!(
"Type {} does not support logical type {:?}",
std::any::type_name::<T>(),
data_type
)))
.unwrap()
}
check(&data_type, &self.values, &self.validity).unwrap();
Self {
data_type,
values: self.values,
Expand Down Expand Up @@ -252,15 +253,37 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// Returns a new [`PrimitiveArray`] by taking every buffer from this one, leaving this one empty.
/// Returns a new [`PrimitiveArray`] by taking everything from this one.
#[must_use]
pub fn take(&mut self) -> Self {
let mut data_type: DataType = T::PRIMITIVE.into();
std::mem::swap(&mut self.data_type, &mut data_type);
Self {
data_type: self.data_type.clone(),
data_type,
values: std::mem::take(&mut self.values),
validity: std::mem::take(&mut self.validity),
}
}

/// Tries to assign the arguments to itself.
///
/// This function is semantically similar to [`Self::try_new`] but it can be used to populate an existing
/// Array.
/// # Errors
/// Errors iff the `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]
pub fn try_assign(
&mut self,
data_type: DataType,
values: Buffer<T>,
validity: Option<Bitmap>,
) -> Result<(), Error> {
check(&data_type, &self.values, &self.validity)?;
self.data_type = data_type;
self.values = values;
self.validity = validity;
Ok(())
}

/// Deconstructs this [`PrimitiveArray`] into its internal components
pub fn into_inner(self) -> (DataType, Buffer<T>, Option<Bitmap>) {
let Self {
Expand Down

0 comments on commit 715fe71

Please sign in to comment.