Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Simplified code #1088

Merged
merged 1 commit into from Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 11 additions & 11 deletions src/array/primitive/mod.rs
Expand Up @@ -309,7 +309,7 @@ impl<T: NativeType> PrimitiveArray<T> {
///
/// This function is primarily used to re-use memory regions.
#[must_use]
pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {
pub fn into_mut(mut self) -> Either<Self, MutablePrimitiveArray<T>> {
use Either::*;

if let Some(bitmap) = self.validity {
Expand All @@ -319,27 +319,27 @@ impl<T: NativeType> PrimitiveArray<T> {
self.values,
Some(bitmap),
)),
Right(mutable_bitmap) => match self.values.into_mut() {
Left(buffer) => Left(PrimitiveArray::new(
self.data_type,
buffer,
Some(mutable_bitmap.into()),
)),
Right(values) => Right(MutablePrimitiveArray::from_data(
Right(mutable_bitmap) => match self.values.get_mut().map(std::mem::take) {
Some(values) => Right(MutablePrimitiveArray::from_data(
self.data_type,
values,
Some(mutable_bitmap),
)),
None => Left(PrimitiveArray::new(
self.data_type,
self.values,
Some(mutable_bitmap.into()),
)),
},
}
} else {
match self.values.into_mut() {
Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)),
Right(values) => Right(MutablePrimitiveArray::from_data(
match self.values.get_mut().map(std::mem::take) {
Some(values) => Right(MutablePrimitiveArray::from_data(
self.data_type,
values,
None,
)),
None => Left(PrimitiveArray::new(self.data_type, self.values, None)),
}
}
}
Expand Down
8 changes: 0 additions & 8 deletions src/array/specification.rs
Expand Up @@ -30,14 +30,6 @@ pub fn check_offsets_minimal<O: Offset>(offsets: &[O], values_len: usize) -> usi
len
}

/// # Panics iff:
/// * the `offsets` is not monotonically increasing, or
/// * any slice of `values` between two consecutive pairs from `offsets` is invalid `utf8`, or
/// * any offset is larger or equal to `values_len`.
pub fn check_offsets_and_utf8<O: Offset>(offsets: &[O], values: &[u8]) {
try_check_offsets_and_utf8(offsets, values).unwrap()
}

/// # Panics iff:
/// * the `offsets` is not monotonically increasing, or
/// * any slice of `values` between two consecutive pairs from `offsets` is invalid `utf8`, or
Expand Down
78 changes: 32 additions & 46 deletions src/array/utf8/mod.rs
Expand Up @@ -243,7 +243,7 @@ impl<O: Offset> Utf8Array<O> {
}

/// Try to convert this `Utf8Array` to a `MutableUtf8Array`
pub fn into_mut(self) -> Either<Self, MutableUtf8Array<O>> {
pub fn into_mut(mut self) -> Either<Self, MutableUtf8Array<O>> {
use Either::*;
if let Some(bitmap) = self.validity {
match bitmap.into_mut() {
Expand All @@ -256,84 +256,70 @@ impl<O: Offset> Utf8Array<O> {
Some(bitmap),
)
}),
Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) {
(Left(immutable_values), Left(immutable_offsets)) => {
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => {
// Safety: invariants are preserved
Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
immutable_offsets,
immutable_values,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Left(immutable_values), Right(mutable_offsets)) => {
(None, Some(offsets)) => {
// Safety: invariants are preserved
Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
mutable_offsets.into(),
immutable_values,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Right(mutable_values), Left(immutable_offsets)) => {
(Some(mutable_values), None) => {
// Safety: invariants are preserved
Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
immutable_offsets,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)
})
}
(Right(mutable_values), Right(mutable_offsets)) => {
Right(MutableUtf8Array::from_data(
(Some(values), Some(offsets)) => Right(unsafe {
MutableUtf8Array::from_data_unchecked(
self.data_type,
mutable_offsets,
mutable_values,
offsets,
values,
Some(mutable_bitmap),
))
}
)
}),
},
}
} else {
match (self.values.into_mut(), self.offsets.into_mut()) {
(Left(immutable_values), Left(immutable_offsets)) => Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
immutable_offsets,
immutable_values,
None,
)
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => Left(unsafe {
Utf8Array::new_unchecked(self.data_type, self.offsets, self.values, None)
}),
(Left(immutable_values), Right(mutable_offsets)) => Left(unsafe {
Utf8Array::new_unchecked(
self.data_type,
mutable_offsets.into(),
immutable_values,
None,
)
(None, Some(offsets)) => Left(unsafe {
Utf8Array::new_unchecked(self.data_type, offsets.into(), self.values, None)
}),
(Right(mutable_values), Left(immutable_offsets)) => Left(unsafe {
Utf8Array::from_data(
self.data_type,
immutable_offsets,
mutable_values.into(),
None,
)
(Some(values), None) => Left(unsafe {
Utf8Array::new_unchecked(self.data_type, self.offsets, values.into(), None)
}),
(Some(values), Some(offsets)) => Right(unsafe {
MutableUtf8Array::from_data_unchecked(self.data_type, offsets, values, None)
}),
(Right(mutable_values), Right(mutable_offsets)) => {
Right(MutableUtf8Array::from_data(
self.data_type,
mutable_offsets,
mutable_values,
None,
))
}
}
}
}
Expand Down
57 changes: 43 additions & 14 deletions src/array/utf8/mutable.rs
Expand Up @@ -2,7 +2,7 @@ use std::{iter::FromIterator, sync::Arc};

use crate::{
array::{
specification::{check_offsets_and_utf8, check_offsets_minimal},
specification::{check_offsets_minimal, try_check_offsets_and_utf8},
Array, MutableArray, Offset, TryExtend, TryPush,
},
bitmap::MutableBitmap,
Expand Down Expand Up @@ -75,6 +75,47 @@ impl<O: Offset> MutableUtf8Array<O> {
}
}

/// Returns a [`MutableUtf8Array`] created from its internal representation.
///
/// # Errors
/// This function returns an error iff:
/// * the offsets are not monotonically increasing
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`.
/// * The `values` between two consecutive `offsets` are not valid utf8
/// # Implementation
/// This function is `O(N)` - checking monotinicity and utf8 is `O(N)`
pub fn try_new(
data_type: DataType,
offsets: Vec<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Result<Self> {
try_check_offsets_and_utf8(&offsets, &values)?;
if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len() - 1)
{
return Err(Error::oos(
"validity's length must be equal to the number of values",
));
}

if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(Error::oos(
"MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8",
));
}

Ok(Self {
data_type,
offsets,
values,
validity,
})
}

/// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs.
/// # Panics
/// This function panics iff:
Expand All @@ -87,19 +128,7 @@ impl<O: Offset> MutableUtf8Array<O> {
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Self {
check_offsets_and_utf8(&offsets, &values);
if let Some(ref validity) = validity {
assert_eq!(offsets.len() - 1, validity.len());
}
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
panic!("MutableUtf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
}
Self {
data_type,
offsets,
values,
validity,
}
Self::try_new(data_type, offsets, values, validity).unwrap()
}

/// Create a [`MutableUtf8Array`] out of low-end APIs.
Expand Down