diff --git a/guide/src/high_level.md b/guide/src/high_level.md index 80652038b0f..8c5b93c55f0 100644 --- a/guide/src/high_level.md +++ b/guide/src/high_level.md @@ -180,12 +180,7 @@ What this means is that certain operations can be performed irrespectively of wh is "null" or not (e.g. `PrimitiveArray + i32` can be applied to _all_ values via SIMD and only copy the validity bitmap independently). -When an operation benefits from such arrangement, it is advantageous to use - -* `Buffer::from_iter` -* `Buffer::from_trusted_len_iter` -* `Buffer::try_from_trusted_len_iter` - +When an operation benefits from such arrangement, it is advantageous to use `Vec` and `Into` If not, then use the `MutableArray` API, such as `MutablePrimitiveArray`, `MutableUtf8Array` or `MutableListArray`. @@ -254,12 +249,11 @@ where O: NativeType, F: Fn(I) -> O, { - // create the iterator over _all_ values - let values = array.values().iter().map(|v| op(*v)); - let values = Buffer::from_trusted_len_iter(values); + // apply F over _all_ values + let values = array.values().iter().map(|v| op(*v)).collect::>(); // create the new array, cloning its validity - PrimitiveArray::::from_data(data_type.clone(), values, array.validity().cloned()) + PrimitiveArray::::from_data(data_type.clone(), values.into(), array.validity().cloned()) } ``` diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index ba7f635de9f..1805e5e33f3 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -116,7 +116,7 @@ impl BinaryArray { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![O::default(); 1 + length].into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 2a843842dc0..4209998ec1f 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -89,7 +89,7 @@ impl FixedSizeBinaryArray { let size = Self::maybe_get_size(&data_type).unwrap(); Self::new( data_type, - Buffer::new_zeroed(length * size), + vec![0u8; length * size].into(), Some(Bitmap::new_zeroed(length)), ) } diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 1a3de45a342..c5ab495559f 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -116,7 +116,7 @@ impl ListArray { let child = Self::get_child_type(&data_type).clone(); Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![O::default(); 1 + length].into(), new_empty_array(child).into(), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/map/mod.rs b/src/array/map/mod.rs index b69ac7e2ac7..2c667b29583 100644 --- a/src/array/map/mod.rs +++ b/src/array/map/mod.rs @@ -106,7 +106,7 @@ impl MapArray { let field = new_empty_array(Self::get_field(&data_type).data_type().clone()).into(); Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![0i32; 1 + length].into(), field, Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/mod.rs b/src/array/mod.rs index 5a0b84b224e..0be802df2f1 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -52,7 +52,7 @@ pub trait Array: Send + Sync { /// The number of null slots on this [`Array`]. /// # Implementation - /// This is `O(1)`. + /// This is `O(1)` since the number of null elements is pre-computed. #[inline] fn null_count(&self) -> usize { if self.data_type() == &DataType::Null { diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index ae753549f75..7ba44345cee 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -305,7 +305,7 @@ impl PrimitiveArray { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - Buffer::new_zeroed(length), + vec![T::default(); length].into(), Some(Bitmap::new_zeroed(length)), ) } diff --git a/src/array/union/mod.rs b/src/array/union/mod.rs index 925a156c11d..dfd87ef6af0 100644 --- a/src/array/union/mod.rs +++ b/src/array/union/mod.rs @@ -142,7 +142,7 @@ impl UnionArray { }; // all from the same field - let types = Buffer::new_zeroed(length); + let types = vec![0i8; length].into(); Self::new(data_type, types, fields, offsets) } else { diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index c216ae0ab59..8c8813c14b5 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -344,7 +344,7 @@ impl Utf8Array { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![O::default(); 1 + length].into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 1f6774f0a81..758de74f29e 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -1,18 +1,38 @@ use either::Either; use std::{iter::FromIterator, sync::Arc, usize}; -use crate::{trusted_len::TrustedLen, types::NativeType}; +use crate::types::NativeType; use super::bytes::Bytes; -/// [`Buffer`] is a contiguous memory region that can -/// be shared across thread boundaries. +/// [`Buffer`] is a contiguous memory region that can be shared across thread boundaries. +/// /// The easiest way to think about `Buffer` is being equivalent to -/// an immutable `Vec`, with the following differences: +/// a `Arc>`, with the following differences: /// * `T` must be [`NativeType`] -/// * clone is `O(1)` -/// * memory is sharable across thread boundaries (it is under an `Arc`) -/// * it supports external allocated memory (FFI) +/// * slicing the buffer is `O(1)` +/// * it supports external allocated memory (via FFI) +/// +/// The easiest way to create one is to use its implementation of `From` a `Vec`. +/// +/// # Examples +/// ``` +/// use arrow2::buffer::Buffer; +/// +/// let buffer: Buffer = vec![1, 2, 3].into(); +/// assert_eq!(buffer.as_ref(), [1, 2, 3].as_ref()); +/// +/// // it supports copy-on-write semantics (i.e. back to a `Vec`) +/// let vec: Vec = buffer.into_mut().right().unwrap(); +/// assert_eq!(vec, vec![1, 2, 3]); +/// +/// // cloning and slicing is `O(1)` (data is shared) +/// let buffer: Buffer = vec![1, 2, 3].into(); +/// let slice = buffer.clone().slice(1, 1); +/// assert_eq!(slice.as_ref(), [2].as_ref()); +/// // no longer possible to get a vec since `slice` and `buffer` share data +/// let same: Buffer = buffer.into_mut().left().unwrap(); +/// ``` #[derive(Clone, PartialEq)] pub struct Buffer { /// the internal byte buffer. @@ -46,20 +66,6 @@ impl Buffer { Self::default() } - /// Creates a new [`Buffer`] filled with zeros. - #[inline] - pub fn new_zeroed(length: usize) -> Self { - vec![T::default(); length].into() - } - - /// Takes ownership of [`Vec`]. - /// # Implementation - /// This function is `O(1)` - #[inline] - pub fn from_slice>(data: R) -> Self { - data.as_ref().to_vec().into() - } - /// Auxiliary method to create a new Buffer pub(crate) fn from_bytes(bytes: Bytes) -> Self { let length = bytes.len(); @@ -153,53 +159,6 @@ impl Buffer { } } -impl Buffer { - /// Creates a [`Buffer`] from an [`Iterator`] with a trusted length. - /// Prefer this to `collect` whenever possible, as it often enables auto-vectorization. - /// # Example - /// ``` - /// # use arrow2::buffer::Buffer; - /// let v = vec![1u32]; - /// let iter = v.iter().map(|x| x * 2); - /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) }; - /// assert_eq!(buffer.len(), 1) - /// ``` - #[inline] - pub fn from_trusted_len_iter>(iterator: I) -> Self { - iterator.collect::>().into() - } - - /// Creates a [`Buffer`] from an fallible [`Iterator`] with a trusted length. - #[inline] - pub fn try_from_trusted_len_iter>>( - iterator: I, - ) -> std::result::Result { - Ok(iterator.collect::, E>>()?.into()) - } - - /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length. - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - #[inline] - pub unsafe fn from_trusted_len_iter_unchecked>(iterator: I) -> Self { - iterator.collect::>().into() - } - - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - #[inline] - pub unsafe fn try_from_trusted_len_iter_unchecked< - E, - I: Iterator>, - >( - iterator: I, - ) -> std::result::Result { - Ok(iterator.collect::, E>>()?.into()) - } -} - impl From> for Buffer { #[inline] fn from(p: Vec) -> Self { diff --git a/tests/it/array/binary/mod.rs b/tests/it/array/binary/mod.rs index 2942188e6aa..fb1bb9dcbe6 100644 --- a/tests/it/array/binary/mod.rs +++ b/tests/it/array/binary/mod.rs @@ -86,24 +86,24 @@ fn with_validity() { #[test] #[should_panic] fn wrong_offsets() { - let offsets = Buffer::from_slice([0, 5, 4]); // invalid offsets - let values = Buffer::from_slice(b"abbbbb"); + let offsets = Buffer::from(vec![0, 5, 4]); // invalid offsets + let values = Buffer::from(b"abbbbb".to_vec()); BinaryArray::::from_data(DataType::Binary, offsets, values, None); } #[test] #[should_panic] fn wrong_data_type() { - let offsets = Buffer::from_slice([0, 4]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 4]); + let values = Buffer::from(b"abbb".to_vec()); BinaryArray::::from_data(DataType::Int8, offsets, values, None); } #[test] #[should_panic] fn value_with_wrong_offsets_panics() { - let offsets = Buffer::from_slice([0, 10, 11, 4]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 10, 11, 4]); + let values = Buffer::from(b"abbb".to_vec()); // the 10-11 is not checked let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); @@ -115,8 +115,8 @@ fn value_with_wrong_offsets_panics() { #[test] #[should_panic] fn index_out_of_bounds_panics() { - let offsets = Buffer::from_slice([0, 1, 2, 4]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 1, 2, 4]); + let values = Buffer::from(b"abbb".to_vec()); let array = BinaryArray::::from_data(DataType::Utf8, offsets, values, None); array.value(3); @@ -125,8 +125,8 @@ fn index_out_of_bounds_panics() { #[test] #[should_panic] fn value_unchecked_with_wrong_offsets_panics() { - let offsets = Buffer::from_slice([0, 10, 11, 4]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 10, 11, 4]); + let values = Buffer::from(b"abbb".to_vec()); // the 10-11 is not checked let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); diff --git a/tests/it/array/equal/list.rs b/tests/it/array/equal/list.rs index 8c0ff89b6e6..c57a1888fcb 100644 --- a/tests/it/array/equal/list.rs +++ b/tests/it/array/equal/list.rs @@ -69,7 +69,7 @@ fn test_list_offsets() { #[test] fn test_bla() { - let offsets = Buffer::from_slice([0, 3, 3, 6]); + let offsets = Buffer::from(vec![0, 3, 3, 6]); let data_type = ListArray::::default_datatype(DataType::Int32); let values = Arc::new(Int32Array::from([ Some(1), @@ -83,7 +83,7 @@ fn test_bla() { let lhs = ListArray::::from_data(data_type, offsets, values, Some(validity)); let lhs = lhs.slice(1, 2); - let offsets = Buffer::from_slice([0, 0, 3]); + let offsets = Buffer::from(vec![0, 0, 3]); let data_type = ListArray::::default_datatype(DataType::Int32); let values = Arc::new(Int32Array::from([Some(4), None, Some(6)])); let validity = Bitmap::from([false, true]); diff --git a/tests/it/array/fixed_size_binary/mod.rs b/tests/it/array/fixed_size_binary/mod.rs index f1edb8bf76d..3163f669732 100644 --- a/tests/it/array/fixed_size_binary/mod.rs +++ b/tests/it/array/fixed_size_binary/mod.rs @@ -6,7 +6,7 @@ mod mutable; fn basics() { let array = FixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - Buffer::from_slice([1, 2, 3, 4, 5, 6]), + Buffer::from(vec![1, 2, 3, 4, 5, 6]), Some(Bitmap::from([true, false, true])), ); assert_eq!(array.size(), 2); @@ -23,7 +23,7 @@ fn basics() { #[test] fn with_validity() { - let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]); + let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]); let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None); let a = a.with_validity(Some(Bitmap::from([true, false, true]))); assert!(a.validity().is_some()); @@ -31,7 +31,7 @@ fn with_validity() { #[test] fn debug() { - let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]); + let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]); let a = FixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), values, @@ -66,26 +66,26 @@ fn from_iter() { #[test] fn wrong_size() { - let values = Buffer::from_slice(b"abb"); + let values = Buffer::from(b"abb".to_vec()); assert!(FixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), values, None).is_err()); } #[test] fn wrong_len() { - let values = Buffer::from_slice(b"abba"); + let values = Buffer::from(b"abba".to_vec()); let validity = Some([true, false, false].into()); // it should be 2 assert!(FixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), values, validity).is_err()); } #[test] fn wrong_data_type() { - let values = Buffer::from_slice(b"abba"); + let values = Buffer::from(b"abba".to_vec()); assert!(FixedSizeBinaryArray::try_new(DataType::Binary, values, None).is_err()); } #[test] fn to() { - let values = Buffer::from_slice(b"abba"); + let values = Buffer::from(b"abba".to_vec()); let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None); let extension = DataType::Extension( diff --git a/tests/it/array/list/mod.rs b/tests/it/array/list/mod.rs index 2a4e1feca4e..c0e8ff19b19 100644 --- a/tests/it/array/list/mod.rs +++ b/tests/it/array/list/mod.rs @@ -8,13 +8,13 @@ mod mutable; #[test] fn debug() { - let values = Buffer::from_slice([1, 2, 3, 4, 5]); + let values = Buffer::from(vec![1, 2, 3, 4, 5]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 2, 3, 5]), + Buffer::from(vec![0, 2, 2, 3, 5]), Arc::new(values), None, ); @@ -25,13 +25,13 @@ fn debug() { #[test] #[should_panic] fn test_nested_panic() { - let values = Buffer::from_slice([1, 2, 3, 4, 5]); + let values = Buffer::from(vec![1, 2, 3, 4, 5]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type.clone(), - Buffer::from_slice([0, 2, 2, 3, 5]), + Buffer::from(vec![0, 2, 2, 3, 5]), Arc::new(values), None, ); @@ -40,7 +40,7 @@ fn test_nested_panic() { // the nested structure of the child data let _ = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 4]), + Buffer::from(vec![0, 2, 4]), Arc::new(array), None, ); @@ -48,13 +48,13 @@ fn test_nested_panic() { #[test] fn test_nested_display() { - let values = Buffer::from_slice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let values = Buffer::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 4, 7, 7, 8, 10]), + Buffer::from(vec![0, 2, 4, 7, 7, 8, 10]), Arc::new(values), None, ); @@ -62,7 +62,7 @@ fn test_nested_display() { let data_type = ListArray::::default_datatype(array.data_type().clone()); let nested = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 5, 6]), + Buffer::from(vec![0, 2, 5, 6]), Arc::new(array), None, ); diff --git a/tests/it/array/list/mutable.rs b/tests/it/array/list/mutable.rs index e87d5354f22..f7d21bdc519 100644 --- a/tests/it/array/list/mutable.rs +++ b/tests/it/array/list/mutable.rs @@ -16,14 +16,14 @@ fn basics() { let values = PrimitiveArray::::from_data( DataType::Int32, - Buffer::from_slice([1, 2, 3, 4, 0, 6]), + Buffer::from(vec![1, 2, 3, 4, 0, 6]), Some(Bitmap::from([true, true, true, true, false, true])), ); let data_type = ListArray::::default_datatype(DataType::Int32); let expected = ListArray::::from_data( data_type, - Buffer::from_slice([0, 3, 3, 6]), + Buffer::from(vec![0, 3, 3, 6]), Arc::new(values), Some(Bitmap::from([true, false, true])), ); diff --git a/tests/it/array/primitive/mod.rs b/tests/it/array/primitive/mod.rs index 7599d8ef2cb..058d503f479 100644 --- a/tests/it/array/primitive/mod.rs +++ b/tests/it/array/primitive/mod.rs @@ -87,20 +87,20 @@ fn months_days_ns() { #[test] fn wrong_data_type() { - let values = Buffer::from_slice(b"abbb"); + let values = Buffer::from(b"abbb".to_vec()); assert!(PrimitiveArray::try_new(DataType::Utf8, values, None).is_err()); } #[test] fn wrong_len() { - let values = Buffer::from_slice(b"abbb"); + let values = Buffer::from(b"abbb".to_vec()); let validity = Some([true, false].into()); assert!(PrimitiveArray::try_new(DataType::Utf8, values, validity).is_err()); } #[test] fn into_mut_1() { - let values = Buffer::::from_slice([0, 1]); + let values = Buffer::::from(vec![0, 1]); let a = values.clone(); // cloned values assert_eq!(a, values); let array = PrimitiveArray::new(DataType::Int32, values, None); @@ -109,7 +109,7 @@ fn into_mut_1() { #[test] fn into_mut_2() { - let values = Buffer::::from_slice([0, 1]); + let values = Buffer::::from(vec![0, 1]); let validity = Some([true, false].into()); let a = validity.clone(); // cloned values assert_eq!(a, validity); @@ -119,7 +119,7 @@ fn into_mut_2() { #[test] fn into_mut_3() { - let values = Buffer::::from_slice([0, 1]); + let values = Buffer::::from(vec![0, 1]); let validity = Some([true, false].into()); let array = PrimitiveArray::new(DataType::Int32, values, validity); assert!(array.into_mut().is_right()); diff --git a/tests/it/array/union.rs b/tests/it/array/union.rs index b52b5ff4fd7..6d8a1f54f7a 100644 --- a/tests/it/array/union.rs +++ b/tests/it/array/union.rs @@ -69,7 +69,7 @@ fn slice() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from_slice([0, 0, 1]); + let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, @@ -79,7 +79,7 @@ fn slice() -> Result<()> { let result = array.slice(1, 2); - let sliced_types = Buffer::from_slice([0, 1]); + let sliced_types = Buffer::from(vec![0, 1]); let sliced_fields = vec![ Arc::new(Int32Array::from(&[None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("b"), Some("c")])) as Arc, @@ -97,7 +97,7 @@ fn iter_sparse() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from_slice([0, 0, 1]); + let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, @@ -130,8 +130,8 @@ fn iter_dense() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Dense); - let types = Buffer::from_slice([0, 0, 1]); - let offsets = Buffer::::from_slice([0, 1, 0]); + let types = Buffer::from(vec![0, 0, 1]); + let offsets = Buffer::::from(vec![0, 1, 0]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None])) as Arc, Arc::new(Utf8Array::::from(&[Some("c")])) as Arc, @@ -164,7 +164,7 @@ fn iter_sparse_slice() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from_slice([0, 0, 1]); + let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), Some(3), Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, @@ -190,8 +190,8 @@ fn iter_dense_slice() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Dense); - let types = Buffer::from_slice([0, 0, 1]); - let offsets = Buffer::::from_slice([0, 1, 0]); + let types = Buffer::from(vec![0, 0, 1]); + let offsets = Buffer::::from(vec![0, 1, 0]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), Some(3)])) as Arc, Arc::new(Utf8Array::::from(&[Some("c")])) as Arc, @@ -217,8 +217,8 @@ fn scalar() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Dense); - let types = Buffer::from_slice([0, 0, 1]); - let offsets = Buffer::::from_slice([0, 1, 0]); + let types = Buffer::from(vec![0, 0, 1]); + let offsets = Buffer::::from(vec![0, 1, 0]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None])) as Arc, Arc::new(Utf8Array::::from(&[Some("c")])) as Arc, diff --git a/tests/it/array/utf8/mod.rs b/tests/it/array/utf8/mod.rs index 90e71440b55..fc55ebccdec 100644 --- a/tests/it/array/utf8/mod.rs +++ b/tests/it/array/utf8/mod.rs @@ -59,8 +59,8 @@ fn from() { fn from_slice() { let b = Utf8Array::::from_slice(["a", "b", "cc"]); - let offsets = Buffer::from_slice([0, 1, 2, 4]); - let values = Buffer::from_slice("abcc".as_bytes()); + let offsets = Buffer::from(vec![0, 1, 2, 4]); + let values = Buffer::from(b"abcc".to_vec()); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -71,8 +71,8 @@ fn from_slice() { fn from_iter_values() { let b = Utf8Array::::from_iter_values(["a", "b", "cc"].iter()); - let offsets = Buffer::from_slice([0, 1, 2, 4]); - let values = Buffer::from_slice(b"abcc"); + let offsets = Buffer::from(vec![0, 1, 2, 4]); + let values = Buffer::from(b"abcc".to_vec()); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -84,8 +84,8 @@ fn from_trusted_len_iter() { let b = Utf8Array::::from_trusted_len_iter(vec![Some("a"), Some("b"), Some("cc")].into_iter()); - let offsets = Buffer::from_slice([0, 1, 2, 4]); - let values = Buffer::from_slice(b"abcc"); + let offsets = Buffer::from(vec![0, 1, 2, 4]); + let values = Buffer::from(b"abcc".to_vec()); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -101,8 +101,8 @@ fn try_from_trusted_len_iter() { ) .unwrap(); - let offsets = Buffer::from_slice([0, 1, 2, 4]); - let values = Buffer::from_slice("abcc".as_bytes()); + let offsets = Buffer::from(vec![0, 1, 2, 4]); + let values = Buffer::from(b"abcc".to_vec()); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -111,59 +111,59 @@ fn try_from_trusted_len_iter() { #[test] fn not_utf8() { - let offsets = Buffer::from_slice([0, 4]); - let values = Buffer::from_slice([0, 159, 146, 150]); // invalid utf8 + let offsets = Buffer::from(vec![0, 4]); + let values = Buffer::from(vec![0, 159, 146, 150]); // invalid utf8 assert!(Utf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] fn not_utf8_individually() { - let offsets = Buffer::from_slice([0, 1, 2]); - let values = Buffer::from_slice([207, 128]); // each is invalid utf8, but together is valid + let offsets = Buffer::from(vec![0, 1, 2]); + let values = Buffer::from(vec![207, 128]); // each is invalid utf8, but together is valid assert!(Utf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] fn wrong_offsets() { - let offsets = Buffer::from_slice([0, 5, 4]); // invalid offsets - let values = Buffer::from_slice(b"abbbbb"); + let offsets = Buffer::from(vec![0, 5, 4]); // invalid offsets + let values = Buffer::from(b"abbbbb".to_vec()); assert!(Utf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] fn wrong_data_type() { - let offsets = Buffer::from_slice([0, 4]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 4]); + let values = Buffer::from(b"abbb".to_vec()); assert!(Utf8Array::::try_new(DataType::Int32, offsets, values, None).is_err()); } #[test] fn out_of_bounds_offsets_panics() { // the 10 is out of bounds - let offsets = Buffer::from_slice([0, 10, 11]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 10, 11]); + let values = Buffer::from(b"abbb".to_vec()); assert!(Utf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] fn decreasing_offset_and_ascii_panics() { - let offsets = Buffer::from_slice([0, 2, 1]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 2, 1]); + let values = Buffer::from(b"abbb".to_vec()); assert!(Utf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] fn decreasing_offset_and_utf8_panics() { - let offsets = Buffer::from_slice([0, 2, 4, 2]); // not increasing - let values = Buffer::from_slice([207, 128, 207, 128, 207, 128]); // valid utf8 + let offsets = Buffer::from(vec![0, 2, 4, 2]); // not increasing + let values = Buffer::from(vec![207, 128, 207, 128, 207, 128]); // valid utf8 assert!(Utf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] #[should_panic] fn index_out_of_bounds_panics() { - let offsets = Buffer::from_slice([0, 1, 2, 4]); - let values = Buffer::from_slice(b"abbb"); + let offsets = Buffer::from(vec![0, 1, 2, 4]); + let values = Buffer::from(b"abbb".to_vec()); let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); array.value(3); @@ -178,8 +178,8 @@ fn debug() { #[test] fn into_mut_1() { - let offsets = Buffer::from_slice([0, 1]); - let values = Buffer::from_slice(b"a"); + let offsets = Buffer::from(vec![0, 1]); + let values = Buffer::from(b"a".to_vec()); let a = values.clone(); // cloned values assert_eq!(a, values); let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); @@ -188,8 +188,8 @@ fn into_mut_1() { #[test] fn into_mut_2() { - let offsets = Buffer::from_slice([0, 1]); - let values = Buffer::from_slice(b"a"); + let offsets = Buffer::from(vec![0, 1]); + let values = Buffer::from(b"a".to_vec()); let a = offsets.clone(); // cloned offsets assert_eq!(a, offsets); let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); @@ -198,8 +198,8 @@ fn into_mut_2() { #[test] fn into_mut_3() { - let offsets = Buffer::from_slice([0, 1]); - let values = Buffer::from_slice(b"a"); + let offsets = Buffer::from(vec![0, 1]); + let values = Buffer::from(b"a".to_vec()); let validity = Some([true].into()); let a = validity.clone(); // cloned validity assert_eq!(a, validity); @@ -209,8 +209,8 @@ fn into_mut_3() { #[test] fn into_mut_4() { - let offsets = Buffer::from_slice([0, 1]); - let values = Buffer::from_slice(b"a"); + let offsets = Buffer::from(vec![0, 1]); + let values = Buffer::from(b"a".to_vec()); let validity = Some([true].into()); let array = Utf8Array::::new(DataType::Utf8, offsets, values, validity); assert!(array.into_mut().is_right()); diff --git a/tests/it/buffer/immutable.rs b/tests/it/buffer/immutable.rs index 48c4e24cb66..9398759e955 100644 --- a/tests/it/buffer/immutable.rs +++ b/tests/it/buffer/immutable.rs @@ -7,24 +7,16 @@ fn new() { assert!(buffer.is_empty()); } -#[test] -fn new_zeroed() { - let buffer = Buffer::::new_zeroed(2); - assert_eq!(buffer.len(), 2); - assert!(!buffer.is_empty()); - assert_eq!(buffer.as_slice(), &[0, 0]); -} - #[test] fn from_slice() { - let buffer = Buffer::::from_slice([0, 1, 2]); + let buffer = Buffer::::from(vec![0, 1, 2]); assert_eq!(buffer.len(), 3); assert_eq!(buffer.as_slice(), &[0, 1, 2]); } #[test] fn slice() { - let buffer = Buffer::::from_slice([0, 1, 2, 3]); + let buffer = Buffer::::from(vec![0, 1, 2, 3]); let buffer = buffer.slice(1, 2); assert_eq!(buffer.len(), 2); assert_eq!(buffer.as_slice(), &[1, 2]); @@ -37,32 +29,9 @@ fn from_iter() { assert_eq!(buffer.as_slice(), &[0, 1, 2]); } -#[test] -fn from_trusted_len_iter() { - let buffer = unsafe { Buffer::::from_trusted_len_iter_unchecked(0..3) }; - assert_eq!(buffer.len(), 3); - assert_eq!(buffer.as_slice(), &[0, 1, 2]); -} - -#[test] -fn try_from_trusted_len_iter() { - let iter = (0..3).map(Result::<_, String>::Ok); - let buffer = unsafe { Buffer::::try_from_trusted_len_iter_unchecked(iter) }.unwrap(); - assert_eq!(buffer.len(), 3); - assert_eq!(buffer.as_slice(), &[0, 1, 2]); -} - -#[test] -fn as_ptr() { - let buffer = Buffer::::from_slice([0, 1, 2, 3]); - let buffer = buffer.slice(1, 2); - let ptr = buffer.as_ptr(); - assert_eq!(unsafe { *ptr }, 1); -} - #[test] fn debug() { - let buffer = Buffer::::from_slice([0, 1, 2, 3]); + let buffer = Buffer::::from(vec![0, 1, 2, 3]); let buffer = buffer.slice(1, 2); let a = format!("{:?}", buffer); assert_eq!(a, "[1, 2]") diff --git a/tests/it/compute/take.rs b/tests/it/compute/take.rs index 676152b727f..f2f5c1bd39b 100644 --- a/tests/it/compute/take.rs +++ b/tests/it/compute/take.rs @@ -178,13 +178,13 @@ fn unsigned_take() { #[test] fn list_with_no_none() { - let values = Buffer::from_slice([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let values = Buffer::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 2, 6, 9, 10]), + Buffer::from(vec![0, 2, 2, 6, 9, 10]), Arc::new(values), None, ); @@ -192,12 +192,12 @@ fn list_with_no_none() { let indices = PrimitiveArray::from([Some(4i32), Some(1), Some(3)]); let result = take(&array, &indices).unwrap(); - let expected_values = Buffer::from_slice([9, 6, 7, 8]); + let expected_values = Buffer::from(vec![9, 6, 7, 8]); let expected_values = PrimitiveArray::::from_data(DataType::Int32, expected_values, None); let expected_type = ListArray::::default_datatype(DataType::Int32); let expected = ListArray::::from_data( expected_type, - Buffer::from_slice([0, 1, 1, 4]), + Buffer::from(vec![0, 1, 1, 4]), Arc::new(expected_values), None, ); @@ -207,7 +207,7 @@ fn list_with_no_none() { #[test] fn list_with_none() { - let values = Buffer::from_slice([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let values = Buffer::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let validity_values = vec![true, false, true, true, true]; @@ -216,7 +216,7 @@ fn list_with_none() { let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 2, 6, 9, 10]), + Buffer::from(vec![0, 2, 2, 6, 9, 10]), Arc::new(values), Some(validity), ); @@ -269,13 +269,13 @@ fn list_both_validity() { #[test] fn test_nested() { - let values = Buffer::from_slice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let values = Buffer::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 4, 7, 7, 8, 10]), + Buffer::from(vec![0, 2, 4, 7, 7, 8, 10]), Arc::new(values), None, ); @@ -283,7 +283,7 @@ fn test_nested() { let data_type = ListArray::::default_datatype(array.data_type().clone()); let nested = ListArray::::from_data( data_type, - Buffer::from_slice([0, 2, 5, 6]), + Buffer::from(vec![0, 2, 5, 6]), Arc::new(array), None, ); @@ -292,13 +292,13 @@ fn test_nested() { let result = take(&nested, &indices).unwrap(); // expected data - let expected_values = Buffer::from_slice([1, 2, 3, 4, 5, 6, 7, 8]); + let expected_values = Buffer::from(vec![1, 2, 3, 4, 5, 6, 7, 8]); let expected_values = PrimitiveArray::::from_data(DataType::Int32, expected_values, None); let expected_data_type = ListArray::::default_datatype(DataType::Int32); let expected_array = ListArray::::from_data( expected_data_type, - Buffer::from_slice([0, 2, 4, 7, 7, 8]), + Buffer::from(vec![0, 2, 4, 7, 7, 8]), Arc::new(expected_values), None, ); @@ -306,7 +306,7 @@ fn test_nested() { let expected_data_type = ListArray::::default_datatype(expected_array.data_type().clone()); let expected = ListArray::::from_data( expected_data_type, - Buffer::from_slice([0, 2, 5]), + Buffer::from(vec![0, 2, 5]), Arc::new(expected_array), None, ); diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index d79a5310f06..dc7cd987465 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -249,7 +249,7 @@ fn list_of_struct() -> Result<()> { // [{"c11": 5, "c12": {"c121": "g"}}] let c1 = ListArray::::from_data( c1_datatype, - Buffer::from_slice([0, 2, 2, 3]), + Buffer::from(vec![0, 2, 2, 3]), Arc::new(s), Some(Bitmap::from_u8_slice([0b00000101], 3)), ); diff --git a/tests/it/io/ndjson/mod.rs b/tests/it/io/ndjson/mod.rs index 1e25de73875..163abc02809 100644 --- a/tests/it/io/ndjson/mod.rs +++ b/tests/it/io/ndjson/mod.rs @@ -297,7 +297,7 @@ fn case_nested_list() -> (String, Arc) { ); let expected = ListArray::from_data( a_list_data_type, - Buffer::from_slice([0i32, 2, 3, 6, 6, 6]), + Buffer::from(vec![0i32, 2, 3, 6, 6, 6]), Arc::new(a_struct) as Arc, Some(Bitmap::from_u8_slice([0b00010111], 5)), ); diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 2da19374ae4..68a0ea43e06 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -68,7 +68,7 @@ pub fn pyarrow_nested_edge(column: &str) -> Box { } pub fn pyarrow_nested_nullable(column: &str) -> Box { - let offsets = Buffer::from_slice([0, 2, 2, 5, 8, 8, 11, 11, 12]); + let offsets = Buffer::from(vec![0, 2, 2, 5, 8, 8, 11, 11, 12]); let values = match column { "list_int64" => { diff --git a/tests/it/io/print.rs b/tests/it/io/print.rs index 68eccdcc32b..5b16931a70e 100644 --- a/tests/it/io/print.rs +++ b/tests/it/io/print.rs @@ -359,7 +359,7 @@ fn write_union() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from_slice([0, 0, 1]); + let types = Buffer::from(vec![0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc,