diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 6a1e8ad3aff..2311d64d042 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -1,8 +1,9 @@ use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType}; use super::{ - display_fmt, display_helper, specification::check_offsets, - specification::check_offsets_minimal, specification::Offset, Array, GenericBinaryArray, + display_fmt, display_helper, + specification::{check_offsets, check_offsets_minimal}, + Array, GenericBinaryArray, Offset, }; mod ffi; diff --git a/src/array/dictionary/ffi.rs b/src/array/dictionary/ffi.rs index 10c094b3931..65805e46b85 100644 --- a/src/array/dictionary/ffi.rs +++ b/src/array/dictionary/ffi.rs @@ -30,7 +30,8 @@ impl FromFfi for DictionaryArray let validity = unsafe { array.validity() }?; let values = unsafe { array.buffer::(1) }?; - let keys = PrimitiveArray::::from_data(K::DATA_TYPE, values, validity); + let data_type = K::PRIMITIVE.into(); + let keys = PrimitiveArray::::from_data(data_type, values, validity); let values = array.dictionary()?.unwrap(); let values = ffi::try_from(values)?.into(); diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 6d186b6bce1..778284b2ce2 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -62,15 +62,17 @@ impl DictionaryArray { pub fn new_empty(data_type: DataType) -> Self { let values = Self::get_child(&data_type); let values = new_empty_array(values.clone()).into(); - Self::from_data(PrimitiveArray::::new_empty(K::DATA_TYPE), values) + let data_type = K::PRIMITIVE.into(); + Self::from_data(PrimitiveArray::::new_empty(data_type), values) } /// Returns an [`DictionaryArray`] whose all elements are null #[inline] pub fn new_null(data_type: DataType, length: usize) -> Self { let values = Self::get_child(&data_type); + let data_type = K::PRIMITIVE.into(); Self::from_data( - PrimitiveArray::::new_null(K::DATA_TYPE, length), + PrimitiveArray::::new_null(data_type, length), new_empty_array(values.clone()).into(), ) } diff --git a/src/array/growable/dictionary.rs b/src/array/growable/dictionary.rs index 7fe1040cea4..4083db797cf 100644 --- a/src/array/growable/dictionary.rs +++ b/src/array/growable/dictionary.rs @@ -84,7 +84,8 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> { let validity = std::mem::take(&mut self.key_validity); let values = std::mem::take(&mut self.key_values); - let keys = PrimitiveArray::::from_data(T::DATA_TYPE, values.into(), validity.into()); + let data_type = T::PRIMITIVE.into(); + let keys = PrimitiveArray::::from_data(data_type, values.into(), validity.into()); DictionaryArray::::from_data(keys, self.values.clone()) } @@ -125,8 +126,9 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> { impl<'a, T: DictionaryKey> From> for DictionaryArray { #[inline] fn from(val: GrowableDictionary<'a, T>) -> Self { + let data_type = T::PRIMITIVE.into(); let keys = PrimitiveArray::::from_data( - T::DATA_TYPE, + data_type, val.key_values.into(), val.key_validity.into(), ); diff --git a/src/array/list/ffi.rs b/src/array/list/ffi.rs index b22c4073a3d..11af11bfd6a 100644 --- a/src/array/list/ffi.rs +++ b/src/array/list/ffi.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use crate::{array::FromFfi, bitmap::align, error::Result, ffi}; -use super::super::{ffi::ToFfi, specification::Offset, Array}; +use super::super::{ffi::ToFfi, Array, Offset}; use super::ListArray; unsafe impl ToFfi for ListArray { diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 11e6fd7592b..07e00ad69ce 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -6,11 +6,7 @@ use crate::{ datatypes::{DataType, Field}, }; -use super::{ - display_fmt, new_empty_array, - specification::{check_offsets, Offset}, - Array, -}; +use super::{display_fmt, new_empty_array, specification::check_offsets, Array, Offset}; mod ffi; mod iterator; diff --git a/src/array/mod.rs b/src/array/mod.rs index 317543087a1..11146b05796 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -371,7 +371,7 @@ pub use list::{ListArray, MutableListArray}; pub use map::MapArray; pub use null::NullArray; pub use primitive::*; -pub use specification::Offset; +pub use crate::types::Offset; pub use struct_::StructArray; pub use union::UnionArray; pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter}; diff --git a/src/array/primitive/from_natural.rs b/src/array/primitive/from_natural.rs index 93717f4a223..dbbe7a77fbb 100644 --- a/src/array/primitive/from_natural.rs +++ b/src/array/primitive/from_natural.rs @@ -3,32 +3,30 @@ use std::iter::FromIterator; use crate::{ buffer::{Buffer, MutableBuffer}, trusted_len::TrustedLen, - types::{NativeType, NaturalDataType}, + types::NativeType, }; use super::{MutablePrimitiveArray, PrimitiveArray}; -impl]>> From

for PrimitiveArray { +impl]>> From

for PrimitiveArray { fn from(slice: P) -> Self { MutablePrimitiveArray::::from(slice).into() } } -impl>> FromIterator - for PrimitiveArray -{ +impl>> FromIterator for PrimitiveArray { fn from_iter>(iter: I) -> Self { MutablePrimitiveArray::::from_iter(iter).into() } } -impl PrimitiveArray { +impl PrimitiveArray { /// Creates a (non-null) [`PrimitiveArray`] from an iterator of values. /// # Implementation /// This does not assume that the iterator has a known length. pub fn from_values>(iter: I) -> Self { Self::from_data( - T::DATA_TYPE, + T::PRIMITIVE.into(), MutableBuffer::::from_iter(iter).into(), None, ) @@ -38,11 +36,11 @@ impl PrimitiveArray { /// # Implementation /// This is essentially a memcopy and is the fastest way to create a [`PrimitiveArray`]. pub fn from_slice>(slice: P) -> Self { - Self::from_data(T::DATA_TYPE, Buffer::::from(slice), None) + Self::from_data(T::PRIMITIVE.into(), Buffer::::from(slice), None) } } -impl PrimitiveArray { +impl PrimitiveArray { /// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values. /// # Implementation /// This does not assume that the iterator has a known length. diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index a84e5e60e7e..b2da624b7ff 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -38,43 +38,39 @@ pub struct PrimitiveArray { } impl PrimitiveArray { - /// Returns a new empty [`PrimitiveArray`]. - pub fn new_empty(data_type: DataType) -> Self { - Self::from_data(data_type, Buffer::new(), None) + /// Returns the length of this array + #[inline] + pub fn len(&self) -> usize { + self.values.len() } - /// Returns a new [`PrimitiveArray`] whose all slots are null / `None`. + /// The optional validity. #[inline] - pub fn new_null(data_type: DataType, length: usize) -> Self { - Self::from_data( - data_type, - Buffer::new_zeroed(length), - Some(Bitmap::new_zeroed(length)), - ) + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() } - /// The canonical method to create a [`PrimitiveArray`] out of low-end APIs. - /// # Panics - /// This function panics iff: - /// * `data_type` is not supported by the physical type - /// * The validity is not `None` and its length is different from the `values`'s length - pub fn from_data(data_type: DataType, values: Buffer, validity: Option) -> Self { - if !T::is_valid(&data_type) { - Err(ArrowError::InvalidArgumentError(format!( - "Type {} does not support logical type {}", - std::any::type_name::(), - data_type - ))) - .unwrap() - } - if let Some(ref validity) = validity { - assert_eq!(values.len(), validity.len()); - } - Self { - data_type, - values, - validity, - } + /// The values [`Buffer`]. + /// Values on null slots are undetermined (they can be anything). + #[inline] + pub fn values(&self) -> &Buffer { + &self.values + } + + /// Returns the value at slot `i`. Equivalent to `self.values()[i]`. + /// The value on null slots is undetermined (it can be anything). + #[inline] + pub fn value(&self, i: usize) -> T { + self.values()[i] + } + + /// Returns the element at index `i` as `T`. + /// The value on null slots is undetermined (it can be anything). + /// # Safety + /// Caller must be sure that `i < self.len()` + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> T { + *self.values.get_unchecked(i) } /// Returns a slice of this [`PrimitiveArray`]. @@ -123,39 +119,43 @@ impl PrimitiveArray { } impl PrimitiveArray { - /// Returns the length of this array - #[inline] - pub fn len(&self) -> usize { - self.values.len() - } - - /// The optional validity. - #[inline] - pub fn validity(&self) -> Option<&Bitmap> { - self.validity.as_ref() - } - - /// The values [`Buffer`]. - /// Values on null slots are undetermined (they can be anything). - #[inline] - pub fn values(&self) -> &Buffer { - &self.values + /// Returns a new empty [`PrimitiveArray`]. + pub fn new_empty(data_type: DataType) -> Self { + Self::from_data(data_type, Buffer::new(), None) } - /// Returns the value at slot `i`. Equivalent to `self.values()[i]`. - /// The value on null slots is undetermined (it can be anything). + /// Returns a new [`PrimitiveArray`] whose all slots are null / `None`. #[inline] - pub fn value(&self, i: usize) -> T { - self.values()[i] + pub fn new_null(data_type: DataType, length: usize) -> Self { + Self::from_data( + data_type, + Buffer::new_zeroed(length), + Some(Bitmap::new_zeroed(length)), + ) } - /// Returns the element at index `i` as `T`. - /// The value on null slots is undetermined (it can be anything). - /// # Safety - /// Caller must be sure that `i < self.len()` - #[inline] - pub unsafe fn value_unchecked(&self, i: usize) -> T { - *self.values.get_unchecked(i) + /// The canonical method to create a [`PrimitiveArray`] out of low-end APIs. + /// # Panics + /// This function panics iff: + /// * `data_type` is not supported by the physical type + /// * The validity is not `None` and its length is different from the `values`'s length + pub fn from_data(data_type: DataType, values: Buffer, validity: Option) -> Self { + if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) { + Err(ArrowError::InvalidArgumentError(format!( + "Type {} does not support logical type {}", + std::any::type_name::(), + data_type + ))) + .unwrap() + } + if let Some(ref validity) = validity { + assert_eq!(values.len(), validity.len()); + } + Self { + data_type, + values, + validity, + } } /// Returns a new [`PrimitiveArray`] with a different logical type. @@ -164,7 +164,7 @@ impl PrimitiveArray { /// Panics iff the data_type is not supported for the physical type. #[inline] pub fn to(self, data_type: DataType) -> Self { - if !T::is_valid(&data_type) { + if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) { Err(ArrowError::InvalidArgumentError(format!( "Type {} does not support logical type {}", std::any::type_name::(), diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index ac5613b754d..8b9fae199b9 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -7,7 +7,7 @@ use crate::{ datatypes::DataType, error::{ArrowError, Result}, trusted_len::TrustedLen, - types::{NativeType, NaturalDataType}, + types::NativeType, }; use super::PrimitiveArray; @@ -33,13 +33,13 @@ impl From> for PrimitiveArray { } } -impl]>> From

for MutablePrimitiveArray { +impl]>> From

for MutablePrimitiveArray { fn from(slice: P) -> Self { Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref())) } } -impl MutablePrimitiveArray { +impl MutablePrimitiveArray { /// Creates a new empty [`MutablePrimitiveArray`]. pub fn new() -> Self { Self::with_capacity(0) @@ -47,7 +47,7 @@ impl MutablePrimitiveArray { /// Creates a new [`MutablePrimitiveArray`] with a capacity. pub fn with_capacity(capacity: usize) -> Self { - Self::with_capacity_from(capacity, T::DATA_TYPE) + Self::with_capacity_from(capacity, T::PRIMITIVE.into()) } /// Create a [`MutablePrimitiveArray`] out of low-end APIs. @@ -60,7 +60,7 @@ impl MutablePrimitiveArray { values: MutableBuffer, validity: Option, ) -> Self { - if !T::is_valid(&data_type) { + if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) { Err(ArrowError::InvalidArgumentError(format!( "Type {} does not support logical type {}", std::any::type_name::(), @@ -84,7 +84,7 @@ impl MutablePrimitiveArray { } } -impl Default for MutablePrimitiveArray { +impl Default for MutablePrimitiveArray { fn default() -> Self { Self::new() } @@ -92,7 +92,7 @@ impl Default for MutablePrimitiveArray { impl From for MutablePrimitiveArray { fn from(data_type: DataType) -> Self { - assert!(T::is_valid(&data_type)); + assert!(data_type.to_physical_type().eq_primitive(T::PRIMITIVE)); Self { data_type, values: MutableBuffer::::new(), @@ -104,7 +104,7 @@ impl From for MutablePrimitiveArray { impl MutablePrimitiveArray { /// Creates a new [`MutablePrimitiveArray`] from a capacity and [`DataType`]. pub fn with_capacity_from(capacity: usize, data_type: DataType) -> Self { - assert!(T::is_valid(&data_type)); + assert!(data_type.to_physical_type().eq_primitive(T::PRIMITIVE)); Self { data_type, values: MutableBuffer::::with_capacity(capacity), @@ -392,7 +392,7 @@ impl MutableArray for MutablePrimitiveArray { } } -impl MutablePrimitiveArray { +impl MutablePrimitiveArray { /// Creates a [`MutablePrimitiveArray`] from a slice of values. pub fn from_slice>(slice: P) -> Self { Self::from_trusted_len_values_iter(slice.as_ref().iter().copied()) @@ -411,7 +411,7 @@ impl MutablePrimitiveArray { let (validity, values) = trusted_len_unzip(iterator); Self { - data_type: T::DATA_TYPE, + data_type: T::PRIMITIVE.into(), values, validity, } @@ -444,7 +444,7 @@ impl MutablePrimitiveArray { let (validity, values) = try_trusted_len_unzip(iterator)?; Ok(Self { - data_type: T::DATA_TYPE, + data_type: T::PRIMITIVE.into(), values, validity, }) @@ -463,7 +463,7 @@ impl MutablePrimitiveArray { /// Creates a new [`MutablePrimitiveArray`] out an iterator over values pub fn from_trusted_len_values_iter>(iter: I) -> Self { Self { - data_type: T::DATA_TYPE, + data_type: T::PRIMITIVE.into(), values: MutableBuffer::::from_trusted_len_iter(iter), validity: None, } @@ -475,14 +475,14 @@ impl MutablePrimitiveArray { /// I.e. that `size_hint().1` correctly reports its length. pub unsafe fn from_trusted_len_values_iter_unchecked>(iter: I) -> Self { Self { - data_type: T::DATA_TYPE, + data_type: T::PRIMITIVE.into(), values: MutableBuffer::::from_trusted_len_iter_unchecked(iter), validity: None, } } } -impl>> FromIterator +impl>> FromIterator for MutablePrimitiveArray { fn from_iter>(iter: I) -> Self { @@ -510,7 +510,7 @@ impl>> FromI }; Self { - data_type: T::DATA_TYPE, + data_type: T::PRIMITIVE.into(), values, validity, } diff --git a/src/array/specification.rs b/src/array/specification.rs index d61735ce8e1..384067df214 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -1,61 +1,4 @@ -use std::convert::TryFrom; - -use num_traits::Num; - -use crate::types::Index; - -mod private { - pub trait Sealed {} - - impl Sealed for i32 {} - impl Sealed for i64 {} -} - -/// Sealed trait describing types that can be used as offsets in Arrow (`i32` and `i64`). -pub trait Offset: private::Sealed + Index + Num + Ord + num_traits::CheckedAdd { - /// Whether it is `i32` or `i64` - fn is_large() -> bool; - - /// converts itself to `isize` - fn to_isize(&self) -> isize; - - /// converts from `isize` - fn from_isize(value: isize) -> Option; -} - -impl Offset for i32 { - #[inline] - fn is_large() -> bool { - false - } - - #[inline] - fn from_isize(value: isize) -> Option { - Self::try_from(value).ok() - } - - #[inline] - fn to_isize(&self) -> isize { - *self as isize - } -} - -impl Offset for i64 { - #[inline] - fn is_large() -> bool { - true - } - - #[inline] - fn from_isize(value: isize) -> Option { - Self::try_from(value).ok() - } - - #[inline] - fn to_isize(&self) -> isize { - *self as isize - } -} +use crate::types::Offset; pub fn check_offsets_minimal(offsets: &[O], values_len: usize) -> usize { assert!( diff --git a/src/compute/arithmetics/basic/div.rs b/src/compute/arithmetics/basic/div.rs index 1f805811073..77d65e8f88f 100644 --- a/src/compute/arithmetics/basic/div.rs +++ b/src/compute/arithmetics/basic/div.rs @@ -3,7 +3,7 @@ use std::ops::Div; use num_traits::{CheckedDiv, NumCast}; -use crate::datatypes::DataType; +use crate::datatypes::PrimitiveType; use crate::{ array::{Array, PrimitiveArray}, compute::{ @@ -111,8 +111,8 @@ where T: NativeArithmetics + Div + NumCast, { let rhs = *rhs; - match T::DATA_TYPE { - DataType::UInt64 => { + match T::PRIMITIVE { + PrimitiveType::UInt64 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u64().unwrap(); @@ -127,7 +127,7 @@ where )) } } - DataType::UInt32 => { + PrimitiveType::UInt32 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u32().unwrap(); @@ -142,7 +142,7 @@ where )) } } - DataType::UInt16 => { + PrimitiveType::UInt16 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u16().unwrap(); @@ -157,7 +157,7 @@ where )) } } - DataType::UInt8 => { + PrimitiveType::UInt8 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u8().unwrap(); diff --git a/src/compute/arithmetics/basic/rem.rs b/src/compute/arithmetics/basic/rem.rs index baa554a806c..0d37169ef77 100644 --- a/src/compute/arithmetics/basic/rem.rs +++ b/src/compute/arithmetics/basic/rem.rs @@ -2,7 +2,7 @@ use std::ops::Rem; use num_traits::{CheckedRem, NumCast}; -use crate::datatypes::DataType; +use crate::datatypes::PrimitiveType; use crate::{ array::{Array, PrimitiveArray}, compute::{ @@ -98,8 +98,8 @@ where { let rhs = *rhs; - match T::DATA_TYPE { - DataType::UInt64 => { + match T::PRIMITIVE { + PrimitiveType::UInt64 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u64().unwrap(); @@ -114,7 +114,7 @@ where )) } } - DataType::UInt32 => { + PrimitiveType::UInt32 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u32().unwrap(); @@ -129,7 +129,7 @@ where )) } } - DataType::UInt16 => { + PrimitiveType::UInt16 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u16().unwrap(); @@ -144,7 +144,7 @@ where )) } } - DataType::UInt8 => { + PrimitiveType::UInt8 => { let lhs = lhs.as_any().downcast_ref::>().unwrap(); let rhs = rhs.to_u8().unwrap(); diff --git a/src/compute/cast/boolean_to.rs b/src/compute/cast/boolean_to.rs index c01b350f1ae..45005988df4 100644 --- a/src/compute/cast/boolean_to.rs +++ b/src/compute/cast/boolean_to.rs @@ -1,8 +1,4 @@ -use crate::{ - array::*, - buffer::Buffer, - types::{NativeType, NaturalDataType}, -}; +use crate::{array::*, buffer::Buffer, types::NativeType}; use crate::{ array::{BinaryArray, Offset, Utf8Array}, error::Result, @@ -10,7 +6,7 @@ use crate::{ pub(super) fn boolean_to_primitive_dyn(array: &dyn Array) -> Result> where - T: NativeType + NaturalDataType + num_traits::One, + T: NativeType + num_traits::One, { let array = array.as_any().downcast_ref().unwrap(); Ok(Box::new(boolean_to_primitive::(array))) @@ -19,7 +15,7 @@ where /// Casts the [`BooleanArray`] to a [`PrimitiveArray`]. pub fn boolean_to_primitive(from: &BooleanArray) -> PrimitiveArray where - T: NativeType + NaturalDataType + num_traits::One, + T: NativeType + num_traits::One, { let iter = from .values() @@ -27,7 +23,7 @@ where .map(|x| if x { T::one() } else { T::default() }); let values = Buffer::::from_trusted_len_iter(iter); - PrimitiveArray::::from_data(T::DATA_TYPE, values, from.validity().cloned()) + PrimitiveArray::::from_data(T::PRIMITIVE.into(), values, from.validity().cloned()) } /// Casts the [`BooleanArray`] to a [`Utf8Array`], casting trues to `"1"` and falses to `"0"` diff --git a/src/compute/cast/dictionary_to.rs b/src/compute/cast/dictionary_to.rs index 88fc33e8870..36878931fa3 100644 --- a/src/compute/cast/dictionary_to.rs +++ b/src/compute/cast/dictionary_to.rs @@ -71,7 +71,7 @@ where let keys = from.keys(); let values = from.values(); - let casted_keys = primitive_to_primitive::(keys, &K2::DATA_TYPE); + let casted_keys = primitive_to_primitive::(keys, &K2::PRIMITIVE.into()); if casted_keys.null_count() > keys.null_count() { Err(ArrowError::Overflow) @@ -91,7 +91,7 @@ where let keys = from.keys(); let values = from.values(); - let casted_keys = primitive_as_primitive::(keys, &K2::DATA_TYPE); + let casted_keys = primitive_as_primitive::(keys, &K2::PRIMITIVE.into()); if casted_keys.null_count() > keys.null_count() { Err(ArrowError::Overflow) diff --git a/src/compute/cast/primitive_to.rs b/src/compute/cast/primitive_to.rs index bbffdbdb2fe..94435185b86 100644 --- a/src/compute/cast/primitive_to.rs +++ b/src/compute/cast/primitive_to.rs @@ -34,7 +34,7 @@ pub fn primitive_to_binary( let len = lexical_core::write_unchecked(*x, bytes).len(); offset += len; - offsets.push(O::from_isize(offset as isize).unwrap()); + offsets.push(O::from_usize(offset as usize).unwrap()); } values.set_len(offset); values.shrink_to_fit(); @@ -100,7 +100,7 @@ pub fn primitive_to_utf8( let len = lexical_core::write_unchecked(*x, bytes).len(); offset += len; - offsets.push(O::from_isize(offset as isize).unwrap()); + offsets.push(O::from_usize(offset as usize).unwrap()); } values.set_len(offset); values.shrink_to_fit(); diff --git a/src/compute/length.rs b/src/compute/length.rs index d74f4ee9d13..fa9434458eb 100644 --- a/src/compute/length.rs +++ b/src/compute/length.rs @@ -22,11 +22,12 @@ use crate::{ buffer::Buffer, datatypes::DataType, error::{ArrowError, Result}, + types::NativeType, }; fn unary_offsets_string(array: &Utf8Array, op: F) -> PrimitiveArray where - O: Offset, + O: Offset + NativeType, F: Fn(O) -> O, { let values = array diff --git a/src/compute/sort/boolean.rs b/src/compute/sort/boolean.rs index 1bf2b6f4a16..817b71d87f1 100644 --- a/src/compute/sort/boolean.rs +++ b/src/compute/sort/boolean.rs @@ -49,5 +49,6 @@ pub fn sort_boolean( values.shrink_to_fit(); } - PrimitiveArray::::from_data(I::DATA_TYPE, values.into(), None) + let data_type = I::PRIMITIVE.into(); + PrimitiveArray::::from_data(data_type, values.into(), None) } diff --git a/src/compute/sort/common.rs b/src/compute/sort/common.rs index 04a6c635078..2b6282036ec 100644 --- a/src/compute/sort/common.rs +++ b/src/compute/sort/common.rs @@ -164,5 +164,6 @@ where indices }; - PrimitiveArray::::from_data(I::DATA_TYPE, indices.into(), None) + let data_type = I::PRIMITIVE.into(); + PrimitiveArray::::from_data(data_type, indices.into(), None) } diff --git a/src/compute/sort/lex_sort.rs b/src/compute/sort/lex_sort.rs index 77856902c4c..bc55a8a9c4c 100644 --- a/src/compute/sort/lex_sort.rs +++ b/src/compute/sort/lex_sort.rs @@ -185,8 +185,9 @@ pub fn lexsort_to_indices( values.sort_unstable_by(lex_comparator); } + let data_type = I::PRIMITIVE.into(); Ok(PrimitiveArray::::from_data( - I::DATA_TYPE, + data_type, values.into(), None, )) diff --git a/src/compute/sort/mod.rs b/src/compute/sort/mod.rs index 43fd2070cb5..af1350f14d3 100644 --- a/src/compute/sort/mod.rs +++ b/src/compute/sort/mod.rs @@ -362,7 +362,8 @@ where values.truncate(limit.unwrap_or_else(|| values.len())); - PrimitiveArray::::from_data(I::DATA_TYPE, values.into(), None) + let data_type = I::PRIMITIVE.into(); + PrimitiveArray::::from_data(data_type, values.into(), None) } /// Compare two `Array`s based on the ordering defined in [ord](crate::array::ord). diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 6ed750b65fc..89d9971ad6c 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -24,7 +24,6 @@ use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::temporal_conversions::*; use crate::types::NativeType; -use crate::types::NaturalDataType; use super::arity::unary; @@ -281,7 +280,7 @@ fn extract_impl( ) -> PrimitiveArray where T: chrono::TimeZone, - A: NativeType + NaturalDataType, + A: NativeType, F: Fn(chrono::DateTime) -> A, { match time_unit { @@ -291,7 +290,7 @@ where let offset = timezone.offset_from_utc_datetime(&datetime); extract(chrono::DateTime::::from_utc(datetime, offset)) }; - unary(array, op, A::DATA_TYPE) + unary(array, op, A::PRIMITIVE.into()) } TimeUnit::Millisecond => { let op = |x| { @@ -299,7 +298,7 @@ where let offset = timezone.offset_from_utc_datetime(&datetime); extract(chrono::DateTime::::from_utc(datetime, offset)) }; - unary(array, op, A::DATA_TYPE) + unary(array, op, A::PRIMITIVE.into()) } TimeUnit::Microsecond => { let op = |x| { @@ -307,7 +306,7 @@ where let offset = timezone.offset_from_utc_datetime(&datetime); extract(chrono::DateTime::::from_utc(datetime, offset)) }; - unary(array, op, A::DATA_TYPE) + unary(array, op, A::PRIMITIVE.into()) } TimeUnit::Nanosecond => { let op = |x| { @@ -315,7 +314,7 @@ where let offset = timezone.offset_from_utc_datetime(&datetime); extract(chrono::DateTime::::from_utc(datetime, offset)) }; - unary(array, op, A::DATA_TYPE) + unary(array, op, A::PRIMITIVE.into()) } } } diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index 8ab7ef579f8..07475b7b0dc 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -299,6 +299,26 @@ impl From for DataType { } } +impl From for DataType { + fn from(item: PrimitiveType) -> Self { + match item { + PrimitiveType::Int8 => DataType::Int8, + PrimitiveType::Int16 => DataType::Int16, + PrimitiveType::Int32 => DataType::Int32, + PrimitiveType::Int64 => DataType::Int64, + PrimitiveType::UInt8 => DataType::UInt8, + PrimitiveType::UInt16 => DataType::UInt16, + PrimitiveType::UInt32 => DataType::UInt32, + PrimitiveType::UInt64 => DataType::UInt64, + PrimitiveType::Int128 => DataType::Decimal(32, 32), + PrimitiveType::Float32 => DataType::Float32, + PrimitiveType::Float64 => DataType::Float64, + PrimitiveType::DaysMs => DataType::Interval(IntervalUnit::DayTime), + PrimitiveType::MonthDayNano => DataType::Interval(IntervalUnit::MonthDayNano), + } + } +} + // backward compatibility use std::collections::BTreeMap; use std::sync::Arc; diff --git a/src/datatypes/physical_type.rs b/src/datatypes/physical_type.rs index cb913429200..7e15cb19629 100644 --- a/src/datatypes/physical_type.rs +++ b/src/datatypes/physical_type.rs @@ -1,3 +1,5 @@ +pub use crate::types::PrimitiveType; + /// The set of physical types: unique in-memory representations of an Arrow array. /// A physical type has a one-to-many relationship with a [`crate::datatypes::DataType`] and /// a one-to-one mapping to each struct in this crate that implements [`crate::array::Array`]. @@ -35,36 +37,15 @@ pub enum PhysicalType { Dictionary(IntegerType), } -/// The set of all (physical) primitive types. -/// Each type corresponds to a variant of [`crate::array::PrimitiveArray`]. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum PrimitiveType { - /// A signed 8-bit integer. - Int8, - /// A signed 16-bit integer. - Int16, - /// A signed 32-bit integer. - Int32, - /// A signed 64-bit integer. - Int64, - /// A signed 128-bit integer. - Int128, - /// An unsigned 8-bit integer. - UInt8, - /// An unsigned 16-bit integer. - UInt16, - /// An unsigned 32-bit integer. - UInt32, - /// An unsigned 64-bit integer. - UInt64, - /// A 32-bit floating point number. - Float32, - /// A 64-bit floating point number. - Float64, - /// Two i32 representing days and ms - DaysMs, - /// months_days_ns(i32, i32, i64) - MonthDayNano, +impl PhysicalType { + /// Whether this physical type equals [`PhysicalType::Primitive`] of type `primitive`. + pub fn eq_primitive(&self, primitive: PrimitiveType) -> bool { + if let Self::Primitive(o) = self { + o == &primitive + } else { + false + } + } } /// the set of valid indices types of a dictionary-encoded Array. diff --git a/src/io/csv/read_utils.rs b/src/io/csv/read_utils.rs index 2ee25c85914..c414f22e355 100644 --- a/src/io/csv/read_utils.rs +++ b/src/io/csv/read_utils.rs @@ -15,7 +15,7 @@ use crate::{ error::{ArrowError, Result}, record_batch::RecordBatch, temporal_conversions, - types::{NativeType, NaturalDataType}, + types::NativeType, }; use super::utils::RFC3339; @@ -33,7 +33,7 @@ fn deserialize_primitive( op: F, ) -> Arc where - T: NativeType + NaturalDataType + lexical_core::FromLexical, + T: NativeType + lexical_core::FromLexical, F: Fn(&[u8]) -> Option, { let iter = rows.iter().map(|row| match row.get(column) { diff --git a/src/io/ipc/read/array/dictionary.rs b/src/io/ipc/read/array/dictionary.rs index 589070e1c0b..6cd61adfc32 100644 --- a/src/io/ipc/read/array/dictionary.rs +++ b/src/io/ipc/read/array/dictionary.rs @@ -40,7 +40,7 @@ where let keys = read_primitive( field_nodes, - T::DATA_TYPE, + T::PRIMITIVE.into(), buffers, reader, block_offset, diff --git a/src/io/json/read/deserialize.rs b/src/io/json/read/deserialize.rs index 64da0d775fc..bcf3b754618 100644 --- a/src/io/json/read/deserialize.rs +++ b/src/io/json/read/deserialize.rs @@ -23,7 +23,6 @@ use indexmap::map::IndexMap as HashMap; use num_traits::NumCast; use serde_json::Value; -use crate::types::NaturalDataType; use crate::{ array::*, bitmap::MutableBitmap, @@ -75,10 +74,7 @@ fn build_extract(data_type: &DataType) -> Extract { } } -fn read_int( - rows: &[&Value], - data_type: DataType, -) -> PrimitiveArray { +fn read_int(rows: &[&Value], data_type: DataType) -> PrimitiveArray { let iter = rows.iter().map(|row| match row { Value::Number(number) => number.as_i64().and_then(num_traits::cast::), Value::Bool(number) => num_traits::cast::(*number as i32), @@ -87,10 +83,7 @@ fn read_int( PrimitiveArray::from_trusted_len_iter(iter).to(data_type) } -fn read_float( - rows: &[&Value], - data_type: DataType, -) -> PrimitiveArray { +fn read_float(rows: &[&Value], data_type: DataType) -> PrimitiveArray { let iter = rows.iter().map(|row| match row { Value::Number(number) => number.as_f64().and_then(num_traits::cast::), Value::Bool(number) => num_traits::cast::(*number as i32), @@ -215,8 +208,7 @@ fn read_dictionary(rows: &[&Value], data_type: DataType) -> Di }, None => None, }) - .collect::>() - .to(K::DATA_TYPE); + .collect::>(); let values = read(&inner, child.clone()); DictionaryArray::::from_data(keys, values) diff --git a/src/io/json_integration/read.rs b/src/io/json_integration/read.rs index 10711fdd3af..e29e1a72375 100644 --- a/src/io/json_integration/read.rs +++ b/src/io/json_integration/read.rs @@ -259,7 +259,7 @@ fn to_dictionary( ArrowError::OutOfSpec(format!("Unable to find any dictionary id {}", dict_id)) })?; - let keys = to_primitive(json_col, K::DATA_TYPE); + let keys = to_primitive(json_col, K::PRIMITIVE.into()); // todo: make DataType::Dictionary hold a Field so that it can hold dictionary_id let inner_data_type = DictionaryArray::::get_child(&data_type); diff --git a/src/io/parquet/read/binary/basic.rs b/src/io/parquet/read/binary/basic.rs index 96b5b21adb0..7e246566f01 100644 --- a/src/io/parquet/read/binary/basic.rs +++ b/src/io/parquet/read/binary/basic.rs @@ -134,8 +134,8 @@ fn read_delta_optional( let len = std::cmp::min(packed.len() * 8, remaining); for is_valid in BitmapIter::new(packed, 0, len) { if is_valid { - let value = values_iterator.next().unwrap() as isize; - last_offset += O::from_isize(value).unwrap(); + let value = values_iterator.next().unwrap() as usize; + last_offset += O::from_usize(value).unwrap(); } offsets.push(last_offset); } @@ -146,8 +146,8 @@ fn read_delta_optional( validity.extend_constant(additional, is_set); if is_set { (0..additional).for_each(|_| { - let value = values_iterator.next().unwrap() as isize; - last_offset += O::from_isize(value).unwrap(); + let value = values_iterator.next().unwrap() as usize; + last_offset += O::from_usize(value).unwrap(); offsets.push(last_offset); }) } else { diff --git a/src/io/parquet/read/binary/dictionary.rs b/src/io/parquet/read/binary/dictionary.rs index 6e650974f42..36760e7c070 100644 --- a/src/io/parquet/read/binary/dictionary.rs +++ b/src/io/parquet/read/binary/dictionary.rs @@ -155,7 +155,7 @@ where // the array is empty and thus we need to push the first offset ourselves. offsets.push(O::zero()); }; - let keys = PrimitiveArray::from_data(K::DATA_TYPE, indices.into(), validity.into()); + let keys = PrimitiveArray::from_data(K::PRIMITIVE.into(), indices.into(), validity.into()); let data_type = DictionaryArray::::get_child(&data_type).clone(); let values = Arc::new(Utf8Array::from_data( data_type, diff --git a/src/io/parquet/read/primitive/dictionary.rs b/src/io/parquet/read/primitive/dictionary.rs index 6a8caeb17d2..480767f10f8 100644 --- a/src/io/parquet/read/primitive/dictionary.rs +++ b/src/io/parquet/read/primitive/dictionary.rs @@ -154,7 +154,7 @@ where )? } - let keys = PrimitiveArray::from_data(K::DATA_TYPE, indices.into(), validity.into()); + let keys = PrimitiveArray::from_data(K::PRIMITIVE.into(), indices.into(), validity.into()); let data_type = DictionaryArray::::get_child(&data_type).clone(); let values = Arc::new(PrimitiveArray::from_data(data_type, values.into(), None)); Ok(Box::new(DictionaryArray::::from_data(keys, values))) diff --git a/src/io/parquet/write/binary/basic.rs b/src/io/parquet/write/binary/basic.rs index a5d6e3210fb..7a7c4cd805e 100644 --- a/src/io/parquet/write/binary/basic.rs +++ b/src/io/parquet/write/binary/basic.rs @@ -129,7 +129,7 @@ pub(crate) fn encode_delta( if let Some(validity) = validity { let lengths = offsets .windows(2) - .map(|w| (w[1] - w[0]).to_isize() as i64) + .map(|w| (w[1] - w[0]).to_usize() as i64) .zip(validity.iter()) .flat_map(|(x, is_valid)| if is_valid { Some(x) } else { None }); let length = offsets.len() - 1 - validity.null_count(); @@ -137,11 +137,11 @@ pub(crate) fn encode_delta( delta_bitpacked::encode(lengths, buffer); } else { - let lengths = offsets.windows(2).map(|w| (w[1] - w[0]).to_isize() as i64); + let lengths = offsets.windows(2).map(|w| (w[1] - w[0]).to_usize() as i64); delta_bitpacked::encode(lengths, buffer); } } else { - let lengths = offsets.windows(2).map(|w| (w[1] - w[0]).to_isize() as i64); + let lengths = offsets.windows(2).map(|w| (w[1] - w[0]).to_usize() as i64); delta_bitpacked::encode(lengths, buffer); } diff --git a/src/scalar/primitive.rs b/src/scalar/primitive.rs index 2d2c827fdf7..277ca08e1dd 100644 --- a/src/scalar/primitive.rs +++ b/src/scalar/primitive.rs @@ -1,8 +1,4 @@ -use crate::{ - datatypes::DataType, - error::ArrowError, - types::{NativeType, NaturalDataType}, -}; +use crate::{datatypes::DataType, error::ArrowError, types::NativeType}; use super::Scalar; @@ -18,7 +14,7 @@ impl PrimitiveScalar { /// Returns a new [`PrimitiveScalar`]. #[inline] pub fn new(data_type: DataType, value: Option) -> Self { - if !T::is_valid(&data_type) { + if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) { Err(ArrowError::InvalidArgumentError(format!( "Type {} does not support logical type {}", std::any::type_name::(), @@ -43,10 +39,10 @@ impl PrimitiveScalar { } } -impl From> for PrimitiveScalar { +impl From> for PrimitiveScalar { #[inline] fn from(v: Option) -> Self { - Self::new(T::DATA_TYPE, v) + Self::new(T::PRIMITIVE.into(), v) } } diff --git a/src/types/bit_chunk.rs b/src/types/bit_chunk.rs index 76cb1e71b7a..eba57e5e484 100644 --- a/src/types/bit_chunk.rs +++ b/src/types/bit_chunk.rs @@ -5,15 +5,11 @@ use std::{ use super::NativeType; -/// Something that can be use as a chunk of bits. This is used to create masks ofa given number -/// of length, whose width is `1`. In `simd_packed` notation, this corresponds to `m1xY`. -/// # Safety -/// Do not implement. -pub unsafe trait BitChunk: - Sized +/// A chunk of bits. This is used to create masks of a given length +/// whose width is `1` bit. In `simd_packed` notation, this corresponds to `m1xY`. +pub trait BitChunk: + super::private::Sealed + NativeType - + Copy - + std::fmt::Debug + Binary + BitAnd + ShlAssign @@ -35,7 +31,7 @@ pub unsafe trait BitChunk: fn from_ne_bytes(v: Self::Bytes) -> Self; } -unsafe impl BitChunk for u8 { +impl BitChunk for u8 { #[inline(always)] fn zero() -> Self { 0 @@ -57,7 +53,7 @@ unsafe impl BitChunk for u8 { } } -unsafe impl BitChunk for u16 { +impl BitChunk for u16 { #[inline(always)] fn zero() -> Self { 0 @@ -79,7 +75,7 @@ unsafe impl BitChunk for u16 { } } -unsafe impl BitChunk for u32 { +impl BitChunk for u32 { #[inline(always)] fn zero() -> Self { 0 @@ -101,7 +97,7 @@ unsafe impl BitChunk for u32 { } } -unsafe impl BitChunk for u64 { +impl BitChunk for u64 { #[inline(always)] fn zero() -> Self { 0 @@ -123,13 +119,13 @@ unsafe impl BitChunk for u64 { } } -/// An iterator of `bool` over a [`BitChunk`]. This iterator is often -/// compiled to SIMD instructions. +/// An [`Iterator`] over a [`BitChunk`]. This iterator is often +/// compiled to SIMD. /// The [LSB](https://en.wikipedia.org/wiki/Bit_numbering#Least_significant_bit) corresponds /// to the first slot, as defined by the arrow specification. /// # Example /// ``` -/// # use arrow2::types::BitChunkIter; +/// use arrow2::types::BitChunkIter; /// let a = 0b00010000u8; /// let iter = BitChunkIter::new(a, 7); /// let r = iter.collect::>(); diff --git a/src/types/index.rs b/src/types/index.rs index 20146b0c595..ae040ec7709 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -1,56 +1,20 @@ use std::convert::TryFrom; -use crate::{ - trusted_len::TrustedLen, - types::{NativeType, NaturalDataType}, -}; +use crate::trusted_len::TrustedLen; -/// iterator of [`Index`] equivalent to `(a..b)`. -// `Step` is unstable in Rust which does not allow (a..b) for generic `Index`. -pub struct IndexRange { - start: I, - end: I, -} - -impl IndexRange { - /// Returns a new [`IndexRange`]. - pub fn new(start: I, end: I) -> Self { - assert!(end >= start); - Self { start, end } - } -} - -impl Iterator for IndexRange { - type Item = I; - - #[inline] - fn next(&mut self) -> Option { - if self.start == self.end { - return None; - } - let old = self.start; - self.start += I::one(); - Some(old) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let len = (self.end - self.start).to_usize(); - (len, Some(len)) - } -} +use super::NativeType; -/// Safety: a range is always of known length -unsafe impl TrustedLen for IndexRange {} - -/// Types that can be used to index a slot of an array. +/// Sealed trait describing the subset of [`NativeType`] (`i32`, `i64`, `u32` and `u64`) +/// that can be used to index a slot of an array. pub trait Index: NativeType - + NaturalDataType + std::ops::AddAssign + std::ops::Sub + num_traits::One + PartialOrd + + num_traits::Num + + Ord + + num_traits::CheckedAdd { /// Convert itself to [`usize`]. fn to_usize(&self) -> usize; @@ -115,3 +79,41 @@ impl Index for u64 { Self::try_from(value).ok() } } + +/// Range of [`Index`], equivalent to `(a..b)`. +/// `Step` is unstable in Rust, which does not allow us to implement (a..b) for [`Index`]. +pub struct IndexRange { + start: I, + end: I, +} + +impl IndexRange { + /// Returns a new [`IndexRange`]. + pub fn new(start: I, end: I) -> Self { + assert!(end >= start); + Self { start, end } + } +} + +impl Iterator for IndexRange { + type Item = I; + + #[inline] + fn next(&mut self) -> Option { + if self.start == self.end { + return None; + } + let old = self.start; + self.start += I::one(); + Some(old) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = (self.end - self.start).to_usize(); + (len, Some(len)) + } +} + +/// Safety: a range is always of known length +unsafe impl TrustedLen for IndexRange {} diff --git a/src/types/mod.rs b/src/types/mod.rs index c13c11249e9..0125f119b08 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,27 +1,65 @@ -//! Traits and implementations to handle _all types_ used in this crate. +#![deny(missing_docs)] +//! Sealed traits and implementations to handle all _physical types_ used in this crate. //! -//! Most physical types used in this crate are native Rust types, like `i32`. -//! The most important trait is [`NativeType`], the generic trait of [`crate::array::PrimitiveArray`]. +//! Most physical types used in this crate are native Rust types, such as `i32`. +//! The trait [`NativeType`] describes the interfaces required by this crate to be conformant +//! with Arrow. //! -//! Another important trait is [`BitChunk`], describing types that can be used to -//! represent chunks of bits (e.g. `u8`, `u16`), and [`BitChunkIter`], that can be used to -//! iterate over bitmaps in [`BitChunk`]s. -//! Finally, this module also contains traits used to compile code optimized for SIMD instructions at [`mod@simd`]. -use std::{convert::TryFrom, ops::Neg}; +//! Every implementation of [`NativeType`] has an associated variant in [`PrimitiveType`], +//! available via [`NativeType::PRIMITIVE`]. +//! Combined, these allow structs generic over [`NativeType`] to be trait objects downcastable +//! to concrete implementations based on the matched [`NativeType::PRIMITIVE`] variant. +//! +//! Another important trait in this module is [`Offset`], the subset of [`NativeType`] that can +//! be used in Arrow offsets (`i32` and `i64`). +//! +//! Another important trait in this module is [`BitChunk`], describing types that can be used to +//! represent chunks of bits (e.g. 8 bits via `u8`, 16 via `u16`), and [`BitChunkIter`], +//! that can be used to iterate over bitmaps in [`BitChunk`]s according to +//! Arrow's definition of bitmaps. +//! +//! Finally, this module contains traits used to compile code based on [`NativeType`] optimized +//! for SIMD, at [`mod@simd`]. mod bit_chunk; pub use bit_chunk::{BitChunk, BitChunkIter}; mod index; pub mod simd; pub use index::*; - -use crate::datatypes::{DataType, IntervalUnit, PhysicalType, PrimitiveType}; - -/// Trait denoting anything that has a natural logical [`DataType`]. -/// For example, [`DataType::Int32`] for `i32`. -pub trait NaturalDataType { - /// The natural [`DataType`]. - const DATA_TYPE: DataType; +mod native; +pub use native::*; +mod offset; +pub use offset::*; + +/// The set of all implementations of the sealed trait [`NativeType`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum PrimitiveType { + /// A signed 8-bit integer. + Int8, + /// A signed 16-bit integer. + Int16, + /// A signed 32-bit integer. + Int32, + /// A signed 64-bit integer. + Int64, + /// A signed 128-bit integer. + Int128, + /// An unsigned 8-bit integer. + UInt8, + /// An unsigned 16-bit integer. + UInt16, + /// An unsigned 32-bit integer. + UInt32, + /// An unsigned 64-bit integer. + UInt64, + /// A 32-bit floating point number. + Float32, + /// A 64-bit floating point number. + Float64, + /// Two i32 representing days and ms + DaysMs, + /// months_days_ns(i32, i32, i64) + MonthDayNano, } mod private { @@ -41,379 +79,3 @@ mod private { impl Sealed for super::days_ms {} impl Sealed for super::months_days_ns {} } - -/// describes whether a [`DataType`] is valid. -pub trait Relation: private::Sealed { - /// Whether `data_type` is a valid [`DataType`]. - fn is_valid(data_type: &DataType) -> bool; -} - -macro_rules! create_relation { - ($native_ty:ty, $physical_ty:expr) => { - impl Relation for $native_ty { - #[inline] - fn is_valid(data_type: &DataType) -> bool { - data_type.to_physical_type() == $physical_ty - } - } - }; -} - -macro_rules! natural_type { - ($type:ty, $data_type:expr) => { - impl NaturalDataType for $type { - const DATA_TYPE: DataType = $data_type; - } - }; -} - -/// Sealed trait that implemented by all types that can be allocated, -/// serialized and deserialized by this crate. -/// All O(N) in-memory allocations are implemented for this trait alone. -pub trait NativeType: - Relation - + NaturalDataType - + Send - + Sync - + Sized - + Copy - + std::fmt::Debug - + std::fmt::Display - + PartialEq - + Default - + 'static -{ - /// Type denoting its representation as bytes. - /// This must be `[u8; N]` where `N = size_of::`. - type Bytes: AsRef<[u8]> - + std::ops::Index - + std::ops::IndexMut - + for<'a> TryFrom<&'a [u8]> - + std::fmt::Debug; - - /// To bytes in little endian - fn to_le_bytes(&self) -> Self::Bytes; - - /// To bytes in native endian - fn to_ne_bytes(&self) -> Self::Bytes; - - /// To bytes in big endian - fn to_be_bytes(&self) -> Self::Bytes; - - /// From bytes in big endian - fn from_be_bytes(bytes: Self::Bytes) -> Self; -} - -macro_rules! native { - ($type:ty) => { - impl NativeType for $type { - type Bytes = [u8; std::mem::size_of::()]; - #[inline] - fn to_le_bytes(&self) -> Self::Bytes { - Self::to_le_bytes(*self) - } - - #[inline] - fn to_be_bytes(&self) -> Self::Bytes { - Self::to_be_bytes(*self) - } - - #[inline] - fn to_ne_bytes(&self) -> Self::Bytes { - Self::to_ne_bytes(*self) - } - - #[inline] - fn from_be_bytes(bytes: Self::Bytes) -> Self { - Self::from_be_bytes(bytes) - } - } - }; -} - -native!(u8); -native!(u16); -native!(u32); -native!(u64); -native!(i8); -native!(i16); -native!(i32); -native!(i64); -native!(i128); -native!(f32); -native!(f64); - -natural_type!(u8, DataType::UInt8); -natural_type!(u16, DataType::UInt16); -natural_type!(u32, DataType::UInt32); -natural_type!(u64, DataType::UInt64); -natural_type!(i8, DataType::Int8); -natural_type!(i16, DataType::Int16); -natural_type!(i32, DataType::Int32); -natural_type!(i64, DataType::Int64); -natural_type!(f32, DataType::Float32); -natural_type!(f64, DataType::Float64); -natural_type!(days_ms, DataType::Interval(IntervalUnit::DayTime)); -natural_type!( - months_days_ns, - DataType::Interval(IntervalUnit::MonthDayNano) -); -natural_type!(i128, DataType::Decimal(32, 32)); // users should set the decimal when creating an array - -create_relation!(u8, PhysicalType::Primitive(PrimitiveType::UInt8)); -create_relation!(u16, PhysicalType::Primitive(PrimitiveType::UInt16)); -create_relation!(u32, PhysicalType::Primitive(PrimitiveType::UInt32)); -create_relation!(u64, PhysicalType::Primitive(PrimitiveType::UInt64)); -create_relation!(i8, PhysicalType::Primitive(PrimitiveType::Int8)); -create_relation!(i16, PhysicalType::Primitive(PrimitiveType::Int16)); -create_relation!(i32, PhysicalType::Primitive(PrimitiveType::Int32)); -create_relation!(i64, PhysicalType::Primitive(PrimitiveType::Int64)); -create_relation!(i128, PhysicalType::Primitive(PrimitiveType::Int128)); -create_relation!(f32, PhysicalType::Primitive(PrimitiveType::Float32)); -create_relation!(f64, PhysicalType::Primitive(PrimitiveType::Float64)); - -/// The in-memory representation of the DayMillisecond variant of arrow's "Interval" logical type. -#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] -#[allow(non_camel_case_types)] -pub struct days_ms([i32; 2]); - -impl std::fmt::Display for days_ms { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}d {}ms", self.days(), self.milliseconds()) - } -} - -impl NativeType for days_ms { - type Bytes = [u8; 8]; - #[inline] - fn to_le_bytes(&self) -> Self::Bytes { - let days = self.0[0].to_le_bytes(); - let ms = self.0[1].to_le_bytes(); - let mut result = [0; 8]; - result[0] = days[0]; - result[1] = days[1]; - result[2] = days[2]; - result[3] = days[3]; - result[4] = ms[0]; - result[5] = ms[1]; - result[6] = ms[2]; - result[7] = ms[3]; - result - } - - #[inline] - fn to_ne_bytes(&self) -> Self::Bytes { - let days = self.0[0].to_ne_bytes(); - let ms = self.0[1].to_ne_bytes(); - let mut result = [0; 8]; - result[0] = days[0]; - result[1] = days[1]; - result[2] = days[2]; - result[3] = days[3]; - result[4] = ms[0]; - result[5] = ms[1]; - result[6] = ms[2]; - result[7] = ms[3]; - result - } - - #[inline] - fn to_be_bytes(&self) -> Self::Bytes { - let days = self.0[0].to_be_bytes(); - let ms = self.0[1].to_be_bytes(); - let mut result = [0; 8]; - result[0] = days[0]; - result[1] = days[1]; - result[2] = days[2]; - result[3] = days[3]; - result[4] = ms[0]; - result[5] = ms[1]; - result[6] = ms[2]; - result[7] = ms[3]; - result - } - - #[inline] - fn from_be_bytes(bytes: Self::Bytes) -> Self { - let mut days = [0; 4]; - days[0] = bytes[0]; - days[1] = bytes[1]; - days[2] = bytes[2]; - days[3] = bytes[3]; - let mut ms = [0; 4]; - ms[0] = bytes[4]; - ms[1] = bytes[5]; - ms[2] = bytes[6]; - ms[3] = bytes[7]; - Self([i32::from_be_bytes(days), i32::from_be_bytes(ms)]) - } -} - -create_relation!(days_ms, PhysicalType::Primitive(PrimitiveType::DaysMs)); - -impl days_ms { - /// A new [`days_ms`]. - #[inline] - pub fn new(days: i32, milliseconds: i32) -> Self { - Self([days, milliseconds]) - } - - /// The number of days - #[inline] - pub fn days(&self) -> i32 { - self.0[0] - } - - /// The number of milliseconds - #[inline] - pub fn milliseconds(&self) -> i32 { - self.0[1] - } -} - -/// The in-memory representation of the MonthDayNano variant of the "Interval" logical type. -#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] -#[allow(non_camel_case_types)] -#[repr(C)] -pub struct months_days_ns(i32, i32, i64); - -impl std::fmt::Display for months_days_ns { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}m {}d {}ns", self.months(), self.days(), self.ns()) - } -} - -impl NativeType for months_days_ns { - type Bytes = [u8; 16]; - #[inline] - fn to_le_bytes(&self) -> Self::Bytes { - let months = self.months().to_le_bytes(); - let days = self.days().to_le_bytes(); - let ns = self.ns().to_le_bytes(); - let mut result = [0; 16]; - result[0] = months[0]; - result[1] = months[1]; - result[2] = months[2]; - result[3] = months[3]; - result[4] = days[0]; - result[5] = days[1]; - result[6] = days[2]; - result[7] = days[3]; - (0..8).for_each(|i| { - result[8 + i] = ns[i]; - }); - result - } - - #[inline] - fn to_ne_bytes(&self) -> Self::Bytes { - let months = self.months().to_ne_bytes(); - let days = self.days().to_ne_bytes(); - let ns = self.ns().to_ne_bytes(); - let mut result = [0; 16]; - result[0] = months[0]; - result[1] = months[1]; - result[2] = months[2]; - result[3] = months[3]; - result[4] = days[0]; - result[5] = days[1]; - result[6] = days[2]; - result[7] = days[3]; - (0..8).for_each(|i| { - result[8 + i] = ns[i]; - }); - result - } - - #[inline] - fn to_be_bytes(&self) -> Self::Bytes { - let months = self.months().to_be_bytes(); - let days = self.days().to_be_bytes(); - let ns = self.ns().to_be_bytes(); - let mut result = [0; 16]; - result[0] = months[0]; - result[1] = months[1]; - result[2] = months[2]; - result[3] = months[3]; - result[4] = days[0]; - result[5] = days[1]; - result[6] = days[2]; - result[7] = days[3]; - (0..8).for_each(|i| { - result[8 + i] = ns[i]; - }); - result - } - - #[inline] - fn from_be_bytes(bytes: Self::Bytes) -> Self { - let mut months = [0; 4]; - months[0] = bytes[0]; - months[1] = bytes[1]; - months[2] = bytes[2]; - months[3] = bytes[3]; - let mut days = [0; 4]; - days[0] = bytes[4]; - days[1] = bytes[5]; - days[2] = bytes[6]; - days[3] = bytes[7]; - let mut ns = [0; 8]; - (0..8).for_each(|i| { - ns[i] = bytes[8 + i]; - }); - Self( - i32::from_be_bytes(months), - i32::from_be_bytes(days), - i64::from_be_bytes(ns), - ) - } -} - -create_relation!( - months_days_ns, - PhysicalType::Primitive(PrimitiveType::MonthDayNano) -); - -impl months_days_ns { - /// A new [`months_days_ns`]. - #[inline] - pub fn new(months: i32, days: i32, nanoseconds: i64) -> Self { - Self(months, days, nanoseconds) - } - - /// The number of months - #[inline] - pub fn months(&self) -> i32 { - self.0 - } - - /// The number of days - #[inline] - pub fn days(&self) -> i32 { - self.1 - } - - /// The number of nanoseconds - #[inline] - pub fn ns(&self) -> i64 { - self.2 - } -} - -impl Neg for days_ms { - type Output = Self; - - #[inline(always)] - fn neg(self) -> Self::Output { - Self([-self.0[0], -self.0[0]]) - } -} - -impl Neg for months_days_ns { - type Output = Self; - - #[inline(always)] - fn neg(self) -> Self::Output { - Self(-self.0, -self.1, -self.2) - } -} diff --git a/src/types/native.rs b/src/types/native.rs new file mode 100644 index 00000000000..bd3aed42827 --- /dev/null +++ b/src/types/native.rs @@ -0,0 +1,325 @@ +use std::convert::TryFrom; +use std::ops::Neg; + +use super::PrimitiveType; + +/// Sealed trait implemented by all physical types that can be allocated, +/// serialized and deserialized by this crate. +/// All O(N) allocations in this crate are done for this trait alone. +pub trait NativeType: + super::private::Sealed + + Send + + Sync + + Sized + + Copy + + std::fmt::Debug + + std::fmt::Display + + PartialEq + + Default + + 'static +{ + /// The corresponding variant of [`PrimitiveType`]. + const PRIMITIVE: PrimitiveType; + + /// Type denoting its representation as bytes. + /// This is `[u8; N]` where `N = size_of::`. + type Bytes: AsRef<[u8]> + + std::ops::Index + + std::ops::IndexMut + + for<'a> TryFrom<&'a [u8]> + + std::fmt::Debug; + + /// To bytes in little endian + fn to_le_bytes(&self) -> Self::Bytes; + + /// To bytes in native endian + fn to_ne_bytes(&self) -> Self::Bytes; + + /// To bytes in big endian + fn to_be_bytes(&self) -> Self::Bytes; + + /// From bytes in big endian + fn from_be_bytes(bytes: Self::Bytes) -> Self; +} + +macro_rules! native_type { + ($type:ty, $primitive_type:expr) => { + impl NativeType for $type { + const PRIMITIVE: PrimitiveType = $primitive_type; + + type Bytes = [u8; std::mem::size_of::()]; + #[inline] + fn to_le_bytes(&self) -> Self::Bytes { + Self::to_le_bytes(*self) + } + + #[inline] + fn to_be_bytes(&self) -> Self::Bytes { + Self::to_be_bytes(*self) + } + + #[inline] + fn to_ne_bytes(&self) -> Self::Bytes { + Self::to_ne_bytes(*self) + } + + #[inline] + fn from_be_bytes(bytes: Self::Bytes) -> Self { + Self::from_be_bytes(bytes) + } + } + }; +} + +native_type!(u8, PrimitiveType::UInt8); +native_type!(u16, PrimitiveType::UInt16); +native_type!(u32, PrimitiveType::UInt32); +native_type!(u64, PrimitiveType::UInt64); +native_type!(i8, PrimitiveType::Int8); +native_type!(i16, PrimitiveType::Int16); +native_type!(i32, PrimitiveType::Int32); +native_type!(i64, PrimitiveType::Int64); +native_type!(f32, PrimitiveType::Float32); +native_type!(f64, PrimitiveType::Float64); +native_type!(i128, PrimitiveType::Int128); + +/// The in-memory representation of the DayMillisecond variant of arrow's "Interval" logical type. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] +#[allow(non_camel_case_types)] +pub struct days_ms([i32; 2]); + +impl days_ms { + /// A new [`days_ms`]. + #[inline] + pub fn new(days: i32, milliseconds: i32) -> Self { + Self([days, milliseconds]) + } + + /// The number of days + #[inline] + pub fn days(&self) -> i32 { + self.0[0] + } + + /// The number of milliseconds + #[inline] + pub fn milliseconds(&self) -> i32 { + self.0[1] + } +} + +impl NativeType for days_ms { + const PRIMITIVE: PrimitiveType = PrimitiveType::DaysMs; + type Bytes = [u8; 8]; + #[inline] + fn to_le_bytes(&self) -> Self::Bytes { + let days = self.0[0].to_le_bytes(); + let ms = self.0[1].to_le_bytes(); + let mut result = [0; 8]; + result[0] = days[0]; + result[1] = days[1]; + result[2] = days[2]; + result[3] = days[3]; + result[4] = ms[0]; + result[5] = ms[1]; + result[6] = ms[2]; + result[7] = ms[3]; + result + } + + #[inline] + fn to_ne_bytes(&self) -> Self::Bytes { + let days = self.0[0].to_ne_bytes(); + let ms = self.0[1].to_ne_bytes(); + let mut result = [0; 8]; + result[0] = days[0]; + result[1] = days[1]; + result[2] = days[2]; + result[3] = days[3]; + result[4] = ms[0]; + result[5] = ms[1]; + result[6] = ms[2]; + result[7] = ms[3]; + result + } + + #[inline] + fn to_be_bytes(&self) -> Self::Bytes { + let days = self.0[0].to_be_bytes(); + let ms = self.0[1].to_be_bytes(); + let mut result = [0; 8]; + result[0] = days[0]; + result[1] = days[1]; + result[2] = days[2]; + result[3] = days[3]; + result[4] = ms[0]; + result[5] = ms[1]; + result[6] = ms[2]; + result[7] = ms[3]; + result + } + + #[inline] + fn from_be_bytes(bytes: Self::Bytes) -> Self { + let mut days = [0; 4]; + days[0] = bytes[0]; + days[1] = bytes[1]; + days[2] = bytes[2]; + days[3] = bytes[3]; + let mut ms = [0; 4]; + ms[0] = bytes[4]; + ms[1] = bytes[5]; + ms[2] = bytes[6]; + ms[3] = bytes[7]; + Self([i32::from_be_bytes(days), i32::from_be_bytes(ms)]) + } +} + +/// The in-memory representation of the MonthDayNano variant of the "Interval" logical type. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] +#[allow(non_camel_case_types)] +#[repr(C)] +pub struct months_days_ns(i32, i32, i64); + +impl months_days_ns { + /// A new [`months_days_ns`]. + #[inline] + pub fn new(months: i32, days: i32, nanoseconds: i64) -> Self { + Self(months, days, nanoseconds) + } + + /// The number of months + #[inline] + pub fn months(&self) -> i32 { + self.0 + } + + /// The number of days + #[inline] + pub fn days(&self) -> i32 { + self.1 + } + + /// The number of nanoseconds + #[inline] + pub fn ns(&self) -> i64 { + self.2 + } +} + +impl NativeType for months_days_ns { + const PRIMITIVE: PrimitiveType = PrimitiveType::MonthDayNano; + type Bytes = [u8; 16]; + #[inline] + fn to_le_bytes(&self) -> Self::Bytes { + let months = self.months().to_le_bytes(); + let days = self.days().to_le_bytes(); + let ns = self.ns().to_le_bytes(); + let mut result = [0; 16]; + result[0] = months[0]; + result[1] = months[1]; + result[2] = months[2]; + result[3] = months[3]; + result[4] = days[0]; + result[5] = days[1]; + result[6] = days[2]; + result[7] = days[3]; + (0..8).for_each(|i| { + result[8 + i] = ns[i]; + }); + result + } + + #[inline] + fn to_ne_bytes(&self) -> Self::Bytes { + let months = self.months().to_ne_bytes(); + let days = self.days().to_ne_bytes(); + let ns = self.ns().to_ne_bytes(); + let mut result = [0; 16]; + result[0] = months[0]; + result[1] = months[1]; + result[2] = months[2]; + result[3] = months[3]; + result[4] = days[0]; + result[5] = days[1]; + result[6] = days[2]; + result[7] = days[3]; + (0..8).for_each(|i| { + result[8 + i] = ns[i]; + }); + result + } + + #[inline] + fn to_be_bytes(&self) -> Self::Bytes { + let months = self.months().to_be_bytes(); + let days = self.days().to_be_bytes(); + let ns = self.ns().to_be_bytes(); + let mut result = [0; 16]; + result[0] = months[0]; + result[1] = months[1]; + result[2] = months[2]; + result[3] = months[3]; + result[4] = days[0]; + result[5] = days[1]; + result[6] = days[2]; + result[7] = days[3]; + (0..8).for_each(|i| { + result[8 + i] = ns[i]; + }); + result + } + + #[inline] + fn from_be_bytes(bytes: Self::Bytes) -> Self { + let mut months = [0; 4]; + months[0] = bytes[0]; + months[1] = bytes[1]; + months[2] = bytes[2]; + months[3] = bytes[3]; + let mut days = [0; 4]; + days[0] = bytes[4]; + days[1] = bytes[5]; + days[2] = bytes[6]; + days[3] = bytes[7]; + let mut ns = [0; 8]; + (0..8).for_each(|i| { + ns[i] = bytes[8 + i]; + }); + Self( + i32::from_be_bytes(months), + i32::from_be_bytes(days), + i64::from_be_bytes(ns), + ) + } +} + +impl std::fmt::Display for days_ms { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}d {}ms", self.days(), self.milliseconds()) + } +} + +impl std::fmt::Display for months_days_ns { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}m {}d {}ns", self.months(), self.days(), self.ns()) + } +} + +impl Neg for days_ms { + type Output = Self; + + #[inline(always)] + fn neg(self) -> Self::Output { + Self::new(-self.days(), -self.milliseconds()) + } +} + +impl Neg for months_days_ns { + type Output = Self; + + #[inline(always)] + fn neg(self) -> Self::Output { + Self::new(-self.months(), -self.days(), -self.ns()) + } +} diff --git a/src/types/offset.rs b/src/types/offset.rs new file mode 100644 index 00000000000..b4ea9661307 --- /dev/null +++ b/src/types/offset.rs @@ -0,0 +1,22 @@ +use super::Index; + +/// Sealed trait describing the subset (`i32` and `i64`) of [`Index`] that can be used +/// as offsets of variable-length Arrow arrays. +pub trait Offset: super::private::Sealed + Index { + /// Whether it is `i32` (false) or `i64` (true). + fn is_large() -> bool; +} + +impl Offset for i32 { + #[inline] + fn is_large() -> bool { + false + } +} + +impl Offset for i64 { + #[inline] + fn is_large() -> bool { + true + } +}