Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added GrowableFixedSizeList and improved MutableFixedSizeListArray #470

Merged
merged 1 commit into from
Sep 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 54 additions & 73 deletions src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
use std::sync::Arc;

use crate::{
array::{
Array, MutableArray, MutableBinaryArray, MutablePrimitiveArray, MutableUtf8Array, Offset,
},
array::{Array, MutableArray, TryExtend, TryPush},
bitmap::MutableBitmap,
datatypes::DataType,
error::{ArrowError, Result},
types::NativeType,
};

use super::FixedSizeListArray;
Expand All @@ -32,6 +29,7 @@ impl<M: MutableArray> From<MutableFixedSizeListArray<M>> for FixedSizeListArray
}

impl<M: MutableArray> MutableFixedSizeListArray<M> {
/// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size.
pub fn new(values: M, size: usize) -> Self {
let data_type = FixedSizeListArray::default_datatype(values.data_type().clone(), size);
assert_eq!(values.len(), 0);
Expand All @@ -43,20 +41,38 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}
}

pub fn mut_values(&mut self) -> &mut M {
&mut self.values
}

/// The inner values
pub fn values(&self) -> &M {
&self.values
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.values.len() - 1)
.chain(std::iter::once(false)),
))
let len = self.values.len() / self.size;

let mut validity = MutableBitmap::new();
validity.extend_constant(len, true);
validity.set(len - 1, false);
self.validity = Some(validity)
}

#[inline]
fn try_push_valid(&mut self) -> Result<()> {
if self.values.len() % self.size != 0 {
return Err(ArrowError::KeyOverflowError);
};
if let Some(validity) = &mut self.validity {
validity.push(true)
}
Ok(())
}

#[inline]
fn push_null(&mut self) {
(0..self.size).for_each(|_| self.values.push_null());
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
}
}

Expand Down Expand Up @@ -97,6 +113,7 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
self
}

#[inline]
fn push_null(&mut self) {
(0..self.size).for_each(|_| {
self.values.push_null();
Expand All @@ -109,69 +126,33 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
}
}

impl<T: NativeType> MutableFixedSizeListArray<MutablePrimitiveArray<T>> {
pub fn try_from_iter<P: IntoIterator<Item = Option<T>>, I: IntoIterator<Item = Option<P>>>(
iter: I,
size: usize,
data_type: DataType,
) -> Result<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let array = MutablePrimitiveArray::<T>::with_capacity_from(lower * size, data_type);
let mut array = MutableFixedSizeListArray::new(array, size);
for items in iterator {
if let Some(items) = items {
let values = array.mut_values();
let len = values.len();
values.extend(items);
if values.len() - len != size {
return Err(ArrowError::InvalidArgumentError(
"A FixedSizeList must have all its values with the same size".to_string(),
));
};
} else {
array.push_null();
}
impl<M, I, T> TryExtend<Option<I>> for MutableFixedSizeListArray<M>
where
M: MutableArray + TryExtend<Option<T>>,
I: IntoIterator<Item = Option<T>>,
{
#[inline]
fn try_extend<II: IntoIterator<Item = Option<I>>>(&mut self, iter: II) -> Result<()> {
for items in iter {
self.try_push(items)?;
}
Ok(array)
Ok(())
}
}

macro_rules! impl_offsets {
($mutable:ident, $type:ty) => {
impl<O: Offset> MutableFixedSizeListArray<$mutable<O>> {
pub fn try_from_iter<
T: AsRef<$type>,
P: IntoIterator<Item = Option<T>>,
I: IntoIterator<Item = Option<P>>,
>(
iter: I,
size: usize,
) -> Result<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let array = $mutable::<O>::with_capacity(lower * size);
let mut array = MutableFixedSizeListArray::new(array, size);
for items in iterator {
if let Some(items) = items {
let values = array.mut_values();
let len = values.len();
values.extend(items);
if values.len() - len != size {
return Err(ArrowError::InvalidArgumentError(
"A FixedSizeList must have all its values with the same size"
.to_string(),
));
};
} else {
array.push_null();
}
}
Ok(array)
}
impl<M, I, T> TryPush<Option<I>> for MutableFixedSizeListArray<M>
where
M: MutableArray + TryExtend<Option<T>>,
I: IntoIterator<Item = Option<T>>,
{
#[inline]
fn try_push(&mut self, item: Option<I>) -> Result<()> {
if let Some(items) = item {
self.values.try_extend(items)?;
self.try_push_valid()?;
} else {
self.push_null();
}
};
Ok(())
}
}

impl_offsets!(MutableUtf8Array, str);
impl_offsets!(MutableBinaryArray, [u8]);
108 changes: 108 additions & 0 deletions src/array/growable/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use std::sync::Arc;

use crate::{
array::{Array, FixedSizeListArray},
bitmap::MutableBitmap,
datatypes::DataType,
};

use super::{
make_growable,
utils::{build_extend_null_bits, ExtendNullBits},
Growable,
};

/// Concrete [`Growable`] for the [`FixedSizeListArray`].
pub struct GrowableFixedSizeList<'a> {
arrays: Vec<&'a FixedSizeListArray>,
validity: MutableBitmap,
values: Box<dyn Growable<'a> + 'a>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
size: usize,
}

impl<'a> GrowableFixedSizeList<'a> {
/// Creates a new [`GrowableList`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(
arrays: Vec<&'a FixedSizeListArray>,
mut use_validity: bool,
capacity: usize,
) -> Self {
assert!(!arrays.is_empty());

// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
if !use_validity & arrays.iter().any(|array| array.null_count() > 0) {
use_validity = true;
};

let size =
if let DataType::FixedSizeList(_, size) = &arrays[0].data_type().to_logical_type() {
*size as usize
} else {
unreachable!("`GrowableFixedSizeList` expects `DataType::FixedSizeList`")
};

let extend_null_bits = arrays
.iter()
.map(|array| build_extend_null_bits(*array, use_validity))
.collect();

let inner = arrays
.iter()
.map(|array| array.values().as_ref())
.collect::<Vec<_>>();
let values = make_growable(&inner, use_validity, 0);

Self {
arrays,
values,
validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
size,
}
}

fn to(&mut self) -> FixedSizeListArray {
let validity = std::mem::take(&mut self.validity);
let values = self.values.as_arc();

FixedSizeListArray::from_data(self.arrays[0].data_type().clone(), values, validity.into())
}
}

impl<'a> Growable<'a> for GrowableFixedSizeList<'a> {
fn extend(&mut self, index: usize, start: usize, len: usize) {
(self.extend_null_bits[index])(&mut self.validity, start, len);
self.values
.extend(index, start * self.size, len * self.size);
}

fn extend_validity(&mut self, additional: usize) {
self.values.extend_validity(additional * self.size);
self.validity.extend_constant(additional, false);
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}

fn as_box(&mut self) -> Box<dyn Array> {
Box::new(self.to())
}
}

impl<'a> From<GrowableFixedSizeList<'a>> for FixedSizeListArray {
fn from(val: GrowableFixedSizeList<'a>) -> Self {
let mut values = val.values;
let values = values.as_arc();

Self::from_data(
val.arrays[0].data_type().clone(),
values,
val.validity.into(),
)
}
}
14 changes: 13 additions & 1 deletion src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ mod list;
pub use list::GrowableList;
mod structure;
pub use structure::GrowableStruct;
mod fixed_size_list;
pub use fixed_size_list::GrowableFixedSizeList;
mod utf8;
pub use utf8::GrowableUtf8;
mod dictionary;
Expand Down Expand Up @@ -201,7 +203,17 @@ pub fn make_growable<'a>(
capacity,
))
}
FixedSizeList => todo!(),
FixedSizeList => {
let arrays = arrays
.iter()
.map(|array| array.as_any().downcast_ref().unwrap())
.collect::<Vec<_>>();
Box::new(fixed_size_list::GrowableFixedSizeList::new(
arrays,
use_validity,
capacity,
))
}
Union => todo!(),
Dictionary(key_type) => {
with_match_physical_dictionary_key_type!(key_type, |$T| {
Expand Down
8 changes: 4 additions & 4 deletions tests/it/array/equal/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow2::{
array::{FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray},
array::{FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray, TryExtend},
datatypes::DataType,
};

Expand All @@ -16,9 +16,9 @@ fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
})
});

MutableFixedSizeListArray::<MutablePrimitiveArray<i32>>::try_from_iter(data, 3, DataType::Int32)
.unwrap()
.into()
let mut list = MutableFixedSizeListArray::new(MutablePrimitiveArray::<i32>::new(), 3);
list.try_extend(data).unwrap();
list.into()
}

#[test]
Expand Down
11 changes: 3 additions & 8 deletions tests/it/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,9 @@ fn primitive() {
Some(vec![Some(4), None, Some(6)]),
];

let list: FixedSizeListArray =
MutableFixedSizeListArray::<MutablePrimitiveArray<i32>>::try_from_iter(
data,
3,
DataType::Int32,
)
.unwrap()
.into();
let mut list = MutableFixedSizeListArray::new(MutablePrimitiveArray::<i32>::new(), 3);
list.try_extend(data).unwrap();
let list: FixedSizeListArray = list.into();

let a = list.value(0);
let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
Expand Down
Loading