Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Removed un-needed methods
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed May 28, 2022
1 parent a03b052 commit 52af809
Show file tree
Hide file tree
Showing 24 changed files with 134 additions and 212 deletions.
14 changes: 4 additions & 10 deletions guide/src/high_level.md
Expand Up @@ -180,12 +180,7 @@ What this means is that certain operations can be performed irrespectively of wh
is "null" or not (e.g. `PrimitiveArray<i32> + i32` can be applied to _all_ values
via SIMD and only copy the validity bitmap independently).

When an operation benefits from such arrangement, it is advantageous to use

* `Buffer::from_iter`
* `Buffer::from_trusted_len_iter`
* `Buffer::try_from_trusted_len_iter`

When an operation benefits from such arrangement, it is advantageous to use `Vec` and `Into<Buffer>`
If not, then use the `MutableArray` API, such as
`MutablePrimitiveArray<T>`, `MutableUtf8Array<O>` or `MutableListArray`.

Expand Down Expand Up @@ -254,12 +249,11 @@ where
O: NativeType,
F: Fn(I) -> O,
{
// create the iterator over _all_ values
let values = array.values().iter().map(|v| op(*v));
let values = Buffer::from_trusted_len_iter(values);
// apply F over _all_ values
let values = array.values().iter().map(|v| op(*v)).collect::<Vec<_>>();

// create the new array, cloning its validity
PrimitiveArray::<O>::from_data(data_type.clone(), values, array.validity().cloned())
PrimitiveArray::<O>::from_data(data_type.clone(), values.into(), array.validity().cloned())
}
```

Expand Down
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Expand Up @@ -116,7 +116,7 @@ impl<O: Offset> BinaryArray<O> {
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![O::default(); 1 + length].into(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/array/fixed_size_binary/mod.rs
Expand Up @@ -89,7 +89,7 @@ impl FixedSizeBinaryArray {
let size = Self::maybe_get_size(&data_type).unwrap();
Self::new(
data_type,
Buffer::new_zeroed(length * size),
vec![0u8; length * size].into(),
Some(Bitmap::new_zeroed(length)),
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/array/list/mod.rs
Expand Up @@ -116,7 +116,7 @@ impl<O: Offset> ListArray<O> {
let child = Self::get_child_type(&data_type).clone();
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![O::default(); 1 + length].into(),
new_empty_array(child).into(),
Some(Bitmap::new_zeroed(length)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/array/map/mod.rs
Expand Up @@ -106,7 +106,7 @@ impl MapArray {
let field = new_empty_array(Self::get_field(&data_type).data_type().clone()).into();
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![0i32; 1 + length].into(),
field,
Some(Bitmap::new_zeroed(length)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/array/mod.rs
Expand Up @@ -52,7 +52,7 @@ pub trait Array: Send + Sync {

/// The number of null slots on this [`Array`].
/// # Implementation
/// This is `O(1)`.
/// This is `O(1)` since the number of null elements is pre-computed.
#[inline]
fn null_count(&self) -> usize {
if self.data_type() == &DataType::Null {
Expand Down
2 changes: 1 addition & 1 deletion src/array/primitive/mod.rs
Expand Up @@ -305,7 +305,7 @@ impl<T: NativeType> PrimitiveArray<T> {
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length),
vec![T::default(); length].into(),
Some(Bitmap::new_zeroed(length)),
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/array/union/mod.rs
Expand Up @@ -142,7 +142,7 @@ impl UnionArray {
};

// all from the same field
let types = Buffer::new_zeroed(length);
let types = vec![0i8; length].into();

Self::new(data_type, types, fields, offsets)
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/array/utf8/mod.rs
Expand Up @@ -344,7 +344,7 @@ impl<O: Offset> Utf8Array<O> {
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![O::default(); 1 + length].into(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
Expand Down
95 changes: 27 additions & 68 deletions src/buffer/immutable.rs
@@ -1,18 +1,38 @@
use either::Either;
use std::{iter::FromIterator, sync::Arc, usize};

use crate::{trusted_len::TrustedLen, types::NativeType};
use crate::types::NativeType;

use super::bytes::Bytes;

/// [`Buffer`] is a contiguous memory region that can
/// be shared across thread boundaries.
/// [`Buffer`] is a contiguous memory region that can be shared across thread boundaries.
///
/// The easiest way to think about `Buffer<T>` is being equivalent to
/// an immutable `Vec<T>`, with the following differences:
/// a `Arc<Vec<T>>`, with the following differences:
/// * `T` must be [`NativeType`]
/// * clone is `O(1)`
/// * memory is sharable across thread boundaries (it is under an `Arc`)
/// * it supports external allocated memory (FFI)
/// * slicing the buffer is `O(1)`
/// * it supports external allocated memory (via FFI)
///
/// The easiest way to create one is to use its implementation of `From` a `Vec`.
///
/// # Examples
/// ```
/// use arrow2::buffer::Buffer;
///
/// let buffer: Buffer<u32> = vec![1, 2, 3].into();
/// assert_eq!(buffer.as_ref(), [1, 2, 3].as_ref());
///
/// // it supports copy-on-write semantics (i.e. back to a `Vec`)
/// let vec: Vec<u32> = buffer.into_mut().right().unwrap();
/// assert_eq!(vec, vec![1, 2, 3]);
///
/// // cloning and slicing is `O(1)` (data is shared)
/// let buffer: Buffer<u32> = vec![1, 2, 3].into();
/// let slice = buffer.clone().slice(1, 1);
/// assert_eq!(slice.as_ref(), [2].as_ref());
/// // no longer possible to get a vec since `slice` and `buffer` share data
/// let same: Buffer<u32> = buffer.into_mut().left().unwrap();
/// ```
#[derive(Clone, PartialEq)]
pub struct Buffer<T: NativeType> {
/// the internal byte buffer.
Expand Down Expand Up @@ -46,20 +66,6 @@ impl<T: NativeType> Buffer<T> {
Self::default()
}

/// Creates a new [`Buffer`] filled with zeros.
#[inline]
pub fn new_zeroed(length: usize) -> Self {
vec![T::default(); length].into()
}

/// Takes ownership of [`Vec`].
/// # Implementation
/// This function is `O(1)`
#[inline]
pub fn from_slice<R: AsRef<[T]>>(data: R) -> Self {
data.as_ref().to_vec().into()
}

/// Auxiliary method to create a new Buffer
pub(crate) fn from_bytes(bytes: Bytes<T>) -> Self {
let length = bytes.len();
Expand Down Expand Up @@ -153,53 +159,6 @@ impl<T: NativeType> Buffer<T> {
}
}

impl<T: NativeType> Buffer<T> {
/// Creates a [`Buffer`] from an [`Iterator`] with a trusted length.
/// Prefer this to `collect` whenever possible, as it often enables auto-vectorization.
/// # Example
/// ```
/// # use arrow2::buffer::Buffer;
/// let v = vec![1u32];
/// let iter = v.iter().map(|x| x * 2);
/// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
/// assert_eq!(buffer.len(), 1)
/// ```
#[inline]
pub fn from_trusted_len_iter<I: TrustedLen<Item = T>>(iterator: I) -> Self {
iterator.collect::<Vec<_>>().into()
}

/// Creates a [`Buffer`] from an fallible [`Iterator`] with a trusted length.
#[inline]
pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<T, E>>>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(iterator.collect::<std::result::Result<Vec<_>, E>>()?.into())
}

/// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
/// # Safety
/// This method assumes that the iterator's size is correct and is undefined behavior
/// to use it on an iterator that reports an incorrect length.
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = T>>(iterator: I) -> Self {
iterator.collect::<Vec<_>>().into()
}

/// # Safety
/// This method assumes that the iterator's size is correct and is undefined behavior
/// to use it on an iterator that reports an incorrect length.
#[inline]
pub unsafe fn try_from_trusted_len_iter_unchecked<
E,
I: Iterator<Item = std::result::Result<T, E>>,
>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(iterator.collect::<std::result::Result<Vec<_>, E>>()?.into())
}
}

impl<T: NativeType> From<Vec<T>> for Buffer<T> {
#[inline]
fn from(p: Vec<T>) -> Self {
Expand Down
20 changes: 10 additions & 10 deletions tests/it/array/binary/mod.rs
Expand Up @@ -86,24 +86,24 @@ fn with_validity() {
#[test]
#[should_panic]
fn wrong_offsets() {
let offsets = Buffer::from_slice([0, 5, 4]); // invalid offsets
let values = Buffer::from_slice(b"abbbbb");
let offsets = Buffer::from(vec![0, 5, 4]); // invalid offsets
let values = Buffer::from(b"abbbbb".to_vec());
BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
}

#[test]
#[should_panic]
fn wrong_data_type() {
let offsets = Buffer::from_slice([0, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 4]);
let values = Buffer::from(b"abbb".to_vec());
BinaryArray::<i32>::from_data(DataType::Int8, offsets, values, None);
}

#[test]
#[should_panic]
fn value_with_wrong_offsets_panics() {
let offsets = Buffer::from_slice([0, 10, 11, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 10, 11, 4]);
let values = Buffer::from(b"abbb".to_vec());
// the 10-11 is not checked
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);

Expand All @@ -115,8 +115,8 @@ fn value_with_wrong_offsets_panics() {
#[test]
#[should_panic]
fn index_out_of_bounds_panics() {
let offsets = Buffer::from_slice([0, 1, 2, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 1, 2, 4]);
let values = Buffer::from(b"abbb".to_vec());
let array = BinaryArray::<i32>::from_data(DataType::Utf8, offsets, values, None);

array.value(3);
Expand All @@ -125,8 +125,8 @@ fn index_out_of_bounds_panics() {
#[test]
#[should_panic]
fn value_unchecked_with_wrong_offsets_panics() {
let offsets = Buffer::from_slice([0, 10, 11, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 10, 11, 4]);
let values = Buffer::from(b"abbb".to_vec());
// the 10-11 is not checked
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);

Expand Down
4 changes: 2 additions & 2 deletions tests/it/array/equal/list.rs
Expand Up @@ -69,7 +69,7 @@ fn test_list_offsets() {

#[test]
fn test_bla() {
let offsets = Buffer::from_slice([0, 3, 3, 6]);
let offsets = Buffer::from(vec![0, 3, 3, 6]);
let data_type = ListArray::<i32>::default_datatype(DataType::Int32);
let values = Arc::new(Int32Array::from([
Some(1),
Expand All @@ -83,7 +83,7 @@ fn test_bla() {
let lhs = ListArray::<i32>::from_data(data_type, offsets, values, Some(validity));
let lhs = lhs.slice(1, 2);

let offsets = Buffer::from_slice([0, 0, 3]);
let offsets = Buffer::from(vec![0, 0, 3]);
let data_type = ListArray::<i32>::default_datatype(DataType::Int32);
let values = Arc::new(Int32Array::from([Some(4), None, Some(6)]));
let validity = Bitmap::from([false, true]);
Expand Down
14 changes: 7 additions & 7 deletions tests/it/array/fixed_size_binary/mod.rs
Expand Up @@ -6,7 +6,7 @@ mod mutable;
fn basics() {
let array = FixedSizeBinaryArray::from_data(
DataType::FixedSizeBinary(2),
Buffer::from_slice([1, 2, 3, 4, 5, 6]),
Buffer::from(vec![1, 2, 3, 4, 5, 6]),
Some(Bitmap::from([true, false, true])),
);
assert_eq!(array.size(), 2);
Expand All @@ -23,15 +23,15 @@ fn basics() {

#[test]
fn with_validity() {
let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]);
let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]);
let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None);
let a = a.with_validity(Some(Bitmap::from([true, false, true])));
assert!(a.validity().is_some());
}

#[test]
fn debug() {
let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]);
let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]);
let a = FixedSizeBinaryArray::from_data(
DataType::FixedSizeBinary(2),
values,
Expand Down Expand Up @@ -66,26 +66,26 @@ fn from_iter() {

#[test]
fn wrong_size() {
let values = Buffer::from_slice(b"abb");
let values = Buffer::from(b"abb".to_vec());
assert!(FixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), values, None).is_err());
}

#[test]
fn wrong_len() {
let values = Buffer::from_slice(b"abba");
let values = Buffer::from(b"abba".to_vec());
let validity = Some([true, false, false].into()); // it should be 2
assert!(FixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), values, validity).is_err());
}

#[test]
fn wrong_data_type() {
let values = Buffer::from_slice(b"abba");
let values = Buffer::from(b"abba".to_vec());
assert!(FixedSizeBinaryArray::try_new(DataType::Binary, values, None).is_err());
}

#[test]
fn to() {
let values = Buffer::from_slice(b"abba");
let values = Buffer::from(b"abba".to_vec());
let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None);

let extension = DataType::Extension(
Expand Down

0 comments on commit 52af809

Please sign in to comment.