diff --git a/src/array/binary/iterator.rs b/src/array/binary/iterator.rs index 7d2679798a6..9da47daba21 100644 --- a/src/array/binary/iterator.rs +++ b/src/array/binary/iterator.rs @@ -6,6 +6,7 @@ use crate::{ use super::BinaryArray; +/// Iterator over slices of `&[u8]`. #[derive(Debug, Clone)] pub struct BinaryValueIter<'a, O: Offset> { array: &'a BinaryArray, @@ -13,7 +14,6 @@ pub struct BinaryValueIter<'a, O: Offset> { } impl<'a, O: Offset> BinaryValueIter<'a, O> { - #[inline] pub fn new(array: &'a BinaryArray) -> Self { Self { array, index: 0 } } diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index b082e954bfe..7c9a2fc3dcb 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -12,6 +12,7 @@ mod from; mod mutable; pub use mutable::*; +/// A [`BinaryArray`] is a nullable array of bytes - the Arrow equivalent of `Vec>>`. #[derive(Debug, Clone)] pub struct BinaryArray { data_type: DataType, @@ -21,11 +22,14 @@ pub struct BinaryArray { offset: usize, } +// constructors impl BinaryArray { + /// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero. pub fn new_empty() -> Self { Self::from_data(Buffer::from(&[O::zero()]), Buffer::new(), None) } + /// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`. #[inline] pub fn new_null(length: usize) -> Self { Self::from_data( @@ -35,9 +39,17 @@ impl BinaryArray { ) } + /// Creates a new [`BinaryArray`] from lower-level parts + /// # Panics + /// * The length of the offset buffer must be larger than 1 + /// * The length of the values must be equal to the last offset value pub fn from_data(offsets: Buffer, values: Buffer, validity: Option) -> Self { check_offsets(&offsets, values.len()); + if let Some(validity) = &validity { + assert_eq!(offsets.len() - 1, validity.len()); + } + Self { data_type: if O::is_large() { DataType::LargeBinary @@ -51,6 +63,11 @@ impl BinaryArray { } } + /// Creates a new [`BinaryArray`] by slicing this [`BinaryArray`]. + /// # Implementation + /// This function is `O(1)`: all data will be shared between both arrays. + /// # Panics + /// iff `offset + length > self.len()`. pub fn slice(&self, offset: usize, length: usize) -> Self { let validity = self.validity.clone().map(|x| x.slice(offset, length)); let offsets = self.offsets.clone().slice(offset, length + 1); @@ -62,8 +79,13 @@ impl BinaryArray { offset: self.offset + offset, } } +} - /// Returns the element at index `i` as &str +// accessors +impl BinaryArray { + /// Returns the element at index `i` + /// # Panics + /// iff `i > self.len()` pub fn value(&self, i: usize) -> &[u8] { let offsets = self.offsets.as_slice(); let offset = offsets[i]; @@ -71,10 +93,10 @@ impl BinaryArray { let length = (offset_1 - offset).to_usize(); let offset = offset.to_usize(); - &self.values.as_slice()[offset..offset + length] + &self.values[offset..offset + length] } - /// Returns the element at index `i` as &str + /// Returns the element at index `i` /// # Safety /// Assumes that the `i < self.len`. pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { @@ -83,14 +105,16 @@ impl BinaryArray { let length = (offset_1 - offset).to_usize(); let offset = offset.to_usize(); - std::slice::from_raw_parts(self.values.as_ptr().add(offset), length) + &self.values[offset..offset + length] } + /// Returns the offsets that slice `.values()` to return valid values. #[inline] pub fn offsets(&self) -> &Buffer { &self.offsets } + /// Returns all values in this array. Use `.offsets()` to slice them. #[inline] pub fn values(&self) -> &Buffer { &self.values diff --git a/src/array/boolean/from.rs b/src/array/boolean/from.rs index 5134dec092b..cf16d572787 100644 --- a/src/array/boolean/from.rs +++ b/src/array/boolean/from.rs @@ -4,6 +4,13 @@ use crate::trusted_len::TrustedLen; use super::{BooleanArray, MutableBooleanArray}; +impl]>> From

for BooleanArray { + /// Creates a new [`BooleanArray`] out of a slice of Optional `bool`. + fn from(slice: P) -> Self { + MutableBooleanArray::from(slice).into() + } +} + impl BooleanArray { /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`. #[inline] diff --git a/src/array/boolean/iterator.rs b/src/array/boolean/iterator.rs index d5e0b0c7731..8b5e679fef0 100644 --- a/src/array/boolean/iterator.rs +++ b/src/array/boolean/iterator.rs @@ -14,7 +14,7 @@ impl<'a> IntoIterator for &'a BooleanArray { } impl<'a> BooleanArray { - /// constructs a new iterator + /// Returns an iterator over the optional values of this [`BooleanArray`]. #[inline] pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> { zip_validity( @@ -23,7 +23,7 @@ impl<'a> BooleanArray { ) } - /// Returns an iterator of `bool` + /// Returns an iterator over the values of this [`BooleanArray`] #[inline] pub fn values_iter(&'a self) -> BitmapIter<'a> { self.values().iter() @@ -41,7 +41,7 @@ impl<'a> IntoIterator for &'a MutableBooleanArray { } impl<'a> MutableBooleanArray { - /// Returns an iterator over `Option` + /// Returns an iterator over the optional values of this [`MutableBooleanArray`]. #[inline] pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> { zip_validity( @@ -50,7 +50,7 @@ impl<'a> MutableBooleanArray { ) } - /// Returns an iterator of `bool` + /// Returns an iterator over the values of this [`MutableBooleanArray`] #[inline] pub fn values_iter(&'a self) -> BitmapIter<'a> { self.values().iter() diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 43c310b3004..3679a361ee9 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -10,12 +10,10 @@ mod mutable; pub use iterator::*; pub use mutable::*; -/// A [`BooleanArray`] is arrow's equivalent to `Vec>`, i.e. -/// an array designed for highly performant operations on optionally nullable booleans. -/// The size of this struct is `O(1)` as all data is stored behind an `Arc`. +/// The Arrow's equivalent to an immutable `Vec>`, but with `1/16` of its size. +/// Cloning and slicing this struct is `O(1)`. #[derive(Debug, Clone)] pub struct BooleanArray { - data_type: DataType, values: Bitmap, validity: Option, offset: usize, @@ -45,7 +43,6 @@ impl BooleanArray { assert_eq!(values.len(), validity.len()); } Self { - data_type: DataType::Boolean, values, validity, offset: 0, @@ -54,28 +51,28 @@ impl BooleanArray { /// Returns a slice of this [`BooleanArray`]. /// # Implementation - /// This operation is `O(1)` as it amounts to essentially increase two ref counts. + /// This operation is `O(1)` as it amounts to increase two ref counts. /// # Panic /// This function panics iff `offset + length >= self.len()`. #[inline] pub fn slice(&self, offset: usize, length: usize) -> Self { let validity = self.validity.clone().map(|x| x.slice(offset, length)); Self { - data_type: self.data_type.clone(), values: self.values.clone().slice(offset, length), validity, offset: self.offset + offset, } } - /// Returns the element at index `i` as bool + /// Returns the value at index `i` + /// # Panic + /// This function panics iff `i >= self.len()`. #[inline] pub fn value(&self, i: usize) -> bool { self.values.get_bit(i) } /// Returns the element at index `i` as bool - /// /// # Safety /// Caller must be sure that `i < self.len()` #[inline] @@ -83,7 +80,7 @@ impl BooleanArray { self.values.get_bit_unchecked(i) } - /// Returns the values bitmap of this [`BooleanArray`]. + /// Returns the values of this [`BooleanArray`]. #[inline] pub fn values(&self) -> &Bitmap { &self.values @@ -103,7 +100,7 @@ impl Array for BooleanArray { #[inline] fn data_type(&self) -> &DataType { - &self.data_type + &DataType::Boolean } #[inline] @@ -122,10 +119,3 @@ impl std::fmt::Display for BooleanArray { display_fmt(self.iter(), "BooleanArray", f, false) } } - -impl]>> From

for BooleanArray { - /// Creates a new [`BooleanArray`] out of a slice of Optional `bool`. - fn from(slice: P) -> Self { - MutableBooleanArray::from(slice).into() - } -} diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index 068e595e687..603a30b83b7 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -12,7 +12,8 @@ use crate::{ use super::BooleanArray; -/// The mutable version of [`BooleanArray`]. See [`MutableArray`] for more details. +/// The Arrow's equivalent to `Vec>`, but with `1/16` of its size. +/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`. #[derive(Debug)] pub struct MutableBooleanArray { values: MutableBitmap, diff --git a/src/array/growable/mod.rs b/src/array/growable/mod.rs index 41e3ca3328a..95b31161cac 100644 --- a/src/array/growable/mod.rs +++ b/src/array/growable/mod.rs @@ -35,7 +35,7 @@ pub trait Growable<'a> { /// This function panics if the range is out of bounds, i.e. if `start + len >= array.len()`. fn extend(&mut self, index: usize, start: usize, len: usize); - /// Extends this [`GrowableArray`] with null elements, disregarding the bound arrays + /// Extends this [`Growable`] with null elements, disregarding the bound arrays fn extend_validity(&mut self, additional: usize); /// Converts itself to an `Arc`, thereby finishing the mutation. diff --git a/src/array/mod.rs b/src/array/mod.rs index 4d1366dbf21..2a9fcb3993a 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,19 +1,15 @@ -//! This module contains arrays: fixed-length and immutable containers with optional values +//! fixed-length and immutable containers with optional values //! that are layed in memory according to the Arrow specification. //! Each array type has its own `struct`. The following are the main array types: -//! //! * [`PrimitiveArray`], an array of values with a fixed length such as integers, floats, etc. //! * [`BooleanArray`], an array of boolean values (stored as a bitmap) //! * [`Utf8Array`], an array of utf8 values //! * [`BinaryArray`], an array of binary values //! * [`ListArray`], an array of arrays (e.g. `[[1, 2], None, [], [None]]`) //! * [`StructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`) -//! -//! This module contains constructors and accessors to operate on the arrays. -//! All the arrays implement the trait [`Array`] and are often trait objects. -//! Every array has a [`DataType`], which you can access with [`Array::data_type`]. -//! This can be used to `downcast_ref` a `&dyn Array` to a concrete struct. -//! Arrays can share memory via [`crate::buffer::Buffer`] and thus cloning and slicing is `O(1)`. +//! All arrays implement the trait [`Array`] and are often trait objects that can be downcasted +//! to a concrete struct based on [`DataType`] available from [`Array::data_type`]. +//! Arrays share memory via [`crate::buffer::Buffer`] and thus cloning and slicing them `O(1)`. //! //! This module also contains the mutable counterparts of arrays, that are neither clonable nor slicable, but that //! can be operated in-place, such as [`MutablePrimitiveArray`] and [`MutableUtf8Array`]. diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 49033ba9dec..4410e2e5134 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -20,6 +20,15 @@ pub use mutable::*; /// an array designed for highly performant operations on optionally nullable slots, /// backed by a physical type of a physical byte-width, such as `i32` or `f64`. /// The size of this struct is `O(1)` as all data is stored behind an [`std::sync::Arc`]. +/// # Example +/// ``` +/// use arrow2::array::PrimitiveArray; +/// # fn main() { +/// let array = PrimitiveArray::::from([Some(1), None, Some(2)]); +/// assert_eq!(array.value(0), 1); +/// assert_eq!(array.values().as_slice(), &[1, 0, 2]); +/// # } +/// ``` #[derive(Debug, Clone)] pub struct PrimitiveArray { data_type: DataType, diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index 9e126ee4857..19e352a34df 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -12,7 +12,8 @@ use crate::{ use super::PrimitiveArray; -/// The mutable version of [`PrimitiveArray`]. See [`MutableArray`] for more details. +/// The Arrow's equivalent to `Vec>` where `T` is byte-size (e.g. `i32`). +/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`. #[derive(Debug)] pub struct MutablePrimitiveArray { data_type: DataType, diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index a9ed14c6b64..bf627d2464f 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -13,17 +13,16 @@ mod mutable; pub use iterator::*; pub use mutable::*; -/// A [`Utf8Array`] is arrow's equivalent of `Vec>`, i.e. -/// an array designed for highly performant operations on optionally nullable strings. -/// The size of this struct is `O(1)` as all data is stored behind an `Arc`. +/// A [`Utf8Array`] is arrow's equivalent of an immutable `Vec>`. +/// Cloning and slicing this struct is `O(1)`. /// # Example /// ``` -/// use std::iter::FromIterator; /// use arrow2::array::Utf8Array; /// # fn main() { -/// let data = vec![Some("hello"), None, Some("hello2")]; -/// let array = Utf8Array::::from_iter(data); -/// assert_eq!(array.value(0), "hello"); +/// let array = Utf8Array::::from([Some("hi"), None, Some("there")]); +/// assert_eq!(array.value(0), "hi"); +/// assert_eq!(array.values().as_slice(), b"hithere".as_ref()); +/// assert_eq!(array.offsets().as_slice(), &[0, 2, 2, 2 + 5]); /// # } /// ``` #[derive(Debug, Clone)] diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index dccc52bdd69..5011e4864a8 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -85,7 +85,7 @@ impl Bitmap { Bitmap::from_bytes(buffer.into(), length) } - /// Creates a new [`Bitmap`] from [`Bytes`] and a length. + /// Creates a new [`Bitmap`] from a slice and length. /// # Panic /// Panics iff `length <= bytes.len() * 8` #[inline] diff --git a/src/buffer/mutable.rs b/src/buffer/mutable.rs index 658bce7a82b..43439161bf3 100644 --- a/src/buffer/mutable.rs +++ b/src/buffer/mutable.rs @@ -462,7 +462,7 @@ impl MutableBuffer { /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors /// if any of the items of the iterator is an error. /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. - /// The only difference between this and [`try_from_trusted_len_iter`] is that this works + /// The only difference between this and [`Self::try_from_trusted_len_iter`] is that this works /// on any iterator, while `try_from_trusted_len_iter` requires the iterator to implement the trait /// [`TrustedLen`], which not every iterator currently implements due to limitations of the Rust compiler. /// # Safety diff --git a/src/compute/concat.rs b/src/compute/concat.rs index ca8919fcfe3..665818abc96 100644 --- a/src/compute/concat.rs +++ b/src/compute/concat.rs @@ -33,7 +33,7 @@ use crate::array::{growable::make_growable, Array}; use crate::error::{ArrowError, Result}; -/// Concatenate multiple [Array] of the same type into a single [ArrayRef]. +/// Concatenate multiple [Array] of the same type into a single [`Array`]. pub fn concatenate(arrays: &[&dyn Array]) -> Result> { if arrays.is_empty() { return Err(ArrowError::InvalidArgumentError( diff --git a/src/compute/merge_sort/mod.rs b/src/compute/merge_sort/mod.rs index 3f4736a24aa..bf0b97734e8 100644 --- a/src/compute/merge_sort/mod.rs +++ b/src/compute/merge_sort/mod.rs @@ -85,7 +85,7 @@ pub type MergeSlice = (usize, usize, usize); /// # Panic /// This function panics if: /// * `max(slices[i].0) >= arrays.len()`, as it indicates that the slices point to an array out of bounds from `arrays`. -/// * the arrays do not have the same [DataType] (as it makes no sense to take together from them) +/// * the arrays do not have the same [`crate::datatypes::DataType`] (as it makes no sense to take together from them) pub fn take_arrays>( arrays: &[&dyn Array], slices: I, @@ -118,12 +118,12 @@ pub fn take_arrays>( growable.as_box() } -/// Combines two sorted [Array]s of the same [DataType] into a single sorted array. +/// Combines two sorted [Array]s of the same [`crate::datatypes::DataType`] into a single sorted array. /// If the arrays are not sorted (which this function does not check), the result is wrong. /// # Error /// This function errors when: -/// * the arrays have a different [DataType] -/// * the arrays have a [DataType] that has no order relationship +/// * the arrays have a different [`crate::datatypes::DataType`] +/// * the arrays have a [`crate::datatypes::DataType`] that has no order relationship /// # Example /// ```rust /// use arrow2::array::Int32Array; @@ -179,7 +179,8 @@ pub fn merge_sort( /// ``` /// # Error /// This function errors if the arrays `a0i` are not pairwise sortable. This happens when either -/// they have not the same [DataType] or when their [DataType] does not correspond to a sortable type. +/// they have not the same [`crate::datatypes::DataType`] or when their [`crate::datatypes::DataType`] +/// does not correspond to a sortable type. /// # Panic /// This function panics if: /// * `pairs` has no elements diff --git a/src/compute/nullif.rs b/src/compute/nullif.rs index 1b41b9e4782..a9476b70e89 100644 --- a/src/compute/nullif.rs +++ b/src/compute/nullif.rs @@ -51,7 +51,7 @@ pub fn nullif_primitive( )) } -/// Returns whether [`nulliff`] is implemented for the datatypes. +/// Returns whether [`nullif`] is implemented for the datatypes. pub fn can_nullif(lhs: &DataType, rhs: &DataType) -> bool { if lhs != rhs { return false; diff --git a/src/io/json/read/infer_schema.rs b/src/io/json/read/infer_schema.rs index 0ae6a8c6681..da5ce613d92 100644 --- a/src/io/json/read/infer_schema.rs +++ b/src/io/json/read/infer_schema.rs @@ -338,7 +338,7 @@ where /// If `max_read_records` is not set, the whole file is read to infer its field types. /// /// Contrary to [`infer_json_schema`], this function will seek back to the start of the `reader`. -/// That way, the `reader` can be used immediately afterwards to create a [`Reader`]. +/// That way, the `reader` can be used immediately afterwards. /// /// # Examples /// ``` diff --git a/src/types/mod.rs b/src/types/mod.rs index b9f630f0055..46d12540887 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -6,7 +6,7 @@ //! Another important trait is [`BitChunk`], describing types that can be used to //! represent chunks of bits (e.g. `u8`, `u16`), and [`BitChunkIter`], that can be used to //! iterate over bitmaps in [`BitChunk`]s. -//! Finally, this module also contains traits used to compile code optimized for SIMD instructions at [`simd`]. +//! Finally, this module also contains traits used to compile code optimized for SIMD instructions at [`mod@simd`]. use std::convert::TryFrom; mod bit_chunk;