Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added TryExtendFromSelf #1278

Merged
merged 1 commit into from
Oct 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl<O: Offset> BinaryArray<O> {
/// Creates a new [`BinaryArray`] from a slice of optional `&[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
MutableBinaryArray::<O>::from(slice).into()
}

/// Returns an iterator of `Option<&[u8]>` over every element of this array.
Expand Down
41 changes: 37 additions & 4 deletions src/array/binary/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
use std::{iter::FromIterator, sync::Arc};

use crate::{
array::{Array, MutableArray, Offset, TryExtend, TryPush},
bitmap::{Bitmap, MutableBitmap},
array::{Array, MutableArray, Offset, TryExtend, TryExtendFromSelf, TryPush},
bitmap::{
utils::{BitmapIter, ZipValidity},
Bitmap, MutableBitmap,
},
datatypes::DataType,
error::{Error, Result},
trusted_len::TrustedLen,
};

use super::{BinaryArray, MutableBinaryValuesArray};
use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};
use crate::array::physical_binary::*;

/// The Arrow's equivalent to `Vec<Option<Vec<u8>>>`.
/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`.
/// # Implementation
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableBinaryArray<O: Offset> {
values: MutableBinaryValuesArray<O>,
validity: Option<MutableBitmap>,
Expand Down Expand Up @@ -96,6 +99,12 @@ impl<O: Offset> MutableBinaryArray<O> {
Self { values, validity }
}

/// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
}

fn default_data_type() -> DataType {
BinaryArray::<O>::default_data_type()
}
Expand Down Expand Up @@ -193,6 +202,16 @@ impl<O: Offset> MutableBinaryArray<O> {
pub fn offsets(&self) -> &Vec<O> {
self.values.offsets()
}

/// Returns an iterator of `Option<&[u8]>`
pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}

/// Returns an iterator over the values of this array
pub fn values_iter(&self) -> MutableBinaryValuesIter<O> {
self.values.iter()
}
}

impl<O: Offset> MutableArray for MutableBinaryArray<O> {
Expand Down Expand Up @@ -478,3 +497,17 @@ impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
Ok(())
}
}

impl<O: Offset> PartialEq for MutableBinaryArray<O> {
fn eq(&self, other: &Self) -> bool {
self.iter().eq(other.iter())
}
}

impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)
}
}
10 changes: 9 additions & 1 deletion src/array/binary/mutable_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use std::{iter::FromIterator, sync::Arc};
use crate::{
array::{
specification::{check_offsets_minimal, try_check_offsets},
Array, ArrayAccessor, ArrayValuesIter, MutableArray, Offset, TryExtend, TryPush,
Array, ArrayAccessor, ArrayValuesIter, MutableArray, Offset, TryExtend, TryExtendFromSelf,
TryPush,
},
bitmap::MutableBitmap,
datatypes::DataType,
Expand Down Expand Up @@ -408,3 +409,10 @@ unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {
self.len()
}
}

impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
self.values.extend_from_slice(&other.values);
try_extend_offsets(&mut self.offsets, &other.offsets)
}
}
17 changes: 15 additions & 2 deletions src/array/boolean/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use std::iter::FromIterator;
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
array::{
physical_binary::extend_validity, Array, MutableArray, TryExtend, TryExtendFromSelf,
TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, PhysicalType},
error::Result,
Expand All @@ -15,7 +18,7 @@ use super::BooleanArray;
/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.
/// # Implementation
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableBooleanArray {
data_type: DataType,
values: MutableBitmap,
Expand Down Expand Up @@ -533,3 +536,13 @@ impl PartialEq for MutableBooleanArray {
self.iter().eq(other.iter())
}
}

impl TryExtendFromSelf for MutableBooleanArray {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

let slice = other.values.as_slice();
self.values.extend_from_slice(slice, 0, other.values.len());
Ok(())
}
}
20 changes: 2 additions & 18 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
use crate::{
bitmap::{Bitmap, MutableBitmap},
buffer::Buffer,
datatypes::DataType,
error::Error,
};
use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType, error::Error};

use super::Array;

Expand Down Expand Up @@ -321,18 +316,7 @@ impl FixedSizeBinaryArray {
/// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
let values = slice
.as_ref()
.iter()
.copied()
.flat_map(|x| x.unwrap_or([0; N]))
.collect::<Vec<_>>();
let validity = slice
.as_ref()
.iter()
.map(|x| x.is_some())
.collect::<MutableBitmap>();
Self::new(DataType::FixedSizeBinary(N), values.into(), validity.into())
MutableFixedSizeBinaryArray::from(slice).into()
}
}

Expand Down
31 changes: 29 additions & 2 deletions src/array/fixed_size_binary/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray},
array::{physical_binary::extend_validity, Array, MutableArray, TryExtendFromSelf},
bitmap::MutableBitmap,
datatypes::DataType,
error::{Error, Result},
Expand All @@ -13,7 +13,7 @@ use super::{FixedSizeBinaryArray, FixedSizeBinaryValues};
/// Converting a [`MutableFixedSizeBinaryArray`] into a [`FixedSizeBinaryArray`] is `O(1)`.
/// # Implementation
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableFixedSizeBinaryArray {
data_type: DataType,
size: usize,
Expand Down Expand Up @@ -73,6 +73,23 @@ impl MutableFixedSizeBinaryArray {
)
}

/// Creates a new [`MutableFixedSizeBinaryArray`] from a slice of optional `[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
let values = slice
.as_ref()
.iter()
.copied()
.flat_map(|x| x.unwrap_or([0; N]))
.collect::<Vec<_>>();
let validity = slice
.as_ref()
.iter()
.map(|x| x.is_some())
.collect::<MutableBitmap>();
Self::from_data(DataType::FixedSizeBinary(N), values, validity.into())
}

/// tries to push a new entry to [`MutableFixedSizeBinaryArray`].
/// # Error
/// Errors iff the size of `value` is not equal to its own size.
Expand Down Expand Up @@ -278,3 +295,13 @@ impl PartialEq for MutableFixedSizeBinaryArray {
self.iter().eq(other.iter())
}
}

impl TryExtendFromSelf for MutableFixedSizeBinaryArray {
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

let slice = other.values.as_slice();
self.values.extend_from_slice(slice);
Ok(())
}
}
23 changes: 21 additions & 2 deletions src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray, TryExtend, TryPush},
array::{
physical_binary::extend_validity, Array, MutableArray, TryExtend, TryExtendFromSelf,
TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, Field},
error::{Error, Result},
Expand All @@ -10,7 +13,7 @@ use crate::{
use super::FixedSizeListArray;

/// The mutable version of [`FixedSizeListArray`].
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableFixedSizeListArray<M: MutableArray> {
data_type: DataType,
size: usize,
Expand Down Expand Up @@ -64,6 +67,11 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
self.size
}

/// The length of this array
pub fn len(&self) -> usize {
self.values.len() / self.size
}

/// The inner values
pub fn values(&self) -> &M {
&self.values
Expand Down Expand Up @@ -210,3 +218,14 @@ where
Ok(())
}
}

impl<M> TryExtendFromSelf for MutableFixedSizeListArray<M>
where
M: MutableArray + TryExtendFromSelf,
{
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)
}
}
24 changes: 24 additions & 0 deletions src/array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,27 @@ impl<'a, O: Offset> ListArray<O> {
ListValuesIter::new(self)
}
}

struct Iter<T, I: Iterator<Item = Option<T>>> {
current: i32,
offsets: std::vec::IntoIter<i32>,
values: I,
}

impl<T, I: Iterator<Item = Option<T>> + Clone> Iterator for Iter<T, I> {
type Item = Option<std::iter::Take<std::iter::Skip<I>>>;

fn next(&mut self) -> Option<Self::Item> {
let next = self.offsets.next();
next.map(|next| {
let length = next - self.current;
let iter = self
.values
.clone()
.skip(self.current as usize)
.take(length as usize);
self.current = next;
Some(iter)
})
}
}
34 changes: 28 additions & 6 deletions src/array/list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
use std::sync::Arc;

use crate::{
array::{specification::try_check_offsets, Array, MutableArray, Offset, TryExtend, TryPush},
array::{
physical_binary::{extend_validity, try_extend_offsets},
specification::try_check_offsets,
Array, MutableArray, Offset, TryExtend, TryExtendFromSelf, TryPush,
},
bitmap::MutableBitmap,
datatypes::{DataType, Field},
error::{Error, Result},
Expand All @@ -11,7 +15,7 @@ use crate::{
use super::ListArray;

/// The mutable version of [`ListArray`].
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MutableListArray<O: Offset, M: MutableArray> {
data_type: DataType,
offsets: Vec<O>,
Expand Down Expand Up @@ -71,6 +75,8 @@ where
I: IntoIterator<Item = Option<T>>,
{
fn try_extend<II: IntoIterator<Item = Option<I>>>(&mut self, iter: II) -> Result<()> {
let iter = iter.into_iter();
self.reserve(iter.size_hint().0);
for items in iter {
self.try_push(items)?;
}
Expand All @@ -97,6 +103,20 @@ where
}
}

impl<O, M> TryExtendFromSelf for MutableListArray<O, M>
where
O: Offset,
M: MutableArray + TryExtendFromSelf,
{
fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)?;

try_extend_offsets(&mut self.offsets, &other.offsets)
}
}

impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
/// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity.
pub fn new_from(values: M, data_type: DataType, capacity: usize) -> Self {
Expand Down Expand Up @@ -222,6 +242,12 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len() - 1
}

/// The values
pub fn mut_values(&mut self) -> &mut M {
&mut self.values
Expand Down Expand Up @@ -279,10 +305,6 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
validity.shrink_to_fit()
}
}

fn len(&self) -> usize {
self.offsets.len() - 1
}
}

impl<O: Offset, M: MutableArray + 'static> MutableArray for MutableListArray<O, M> {
Expand Down
7 changes: 7 additions & 0 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,13 @@ pub trait TryPush<A> {
fn try_push(&mut self, item: A) -> Result<()>;
}

/// A trait describing the ability of a struct to extend from a reference of itself.
/// Specialization of [`TryExtend`].
pub trait TryExtendFromSelf {
/// Tries to extend itself with elements from `other`, failing only on overflow.
fn try_extend_from_self(&mut self, other: &Self) -> Result<()>;
}

/// Trait that [`BinaryArray`] and [`Utf8Array`] implement for the purposes of DRY.
/// # Safety
/// The implementer must ensure that
Expand Down
Loading