Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved documentation (#306)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 21, 2021
1 parent 4d227fa commit 83256a8
Show file tree
Hide file tree
Showing 18 changed files with 84 additions and 56 deletions.
2 changes: 1 addition & 1 deletion src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ use crate::{

use super::BinaryArray;

/// Iterator over slices of `&[u8]`.
#[derive(Debug, Clone)]
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
#[inline]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self { array, index: 0 }
}
Expand Down
32 changes: 28 additions & 4 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod from;
mod mutable;
pub use mutable::*;

/// A [`BinaryArray`] is a nullable array of bytes - the Arrow equivalent of `Vec<Option<Vec<u8>>>`.
#[derive(Debug, Clone)]
pub struct BinaryArray<O: Offset> {
data_type: DataType,
Expand All @@ -21,11 +22,14 @@ pub struct BinaryArray<O: Offset> {
offset: usize,
}

// constructors
impl<O: Offset> BinaryArray<O> {
/// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero.
pub fn new_empty() -> Self {
Self::from_data(Buffer::from(&[O::zero()]), Buffer::new(), None)
}

/// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`.
#[inline]
pub fn new_null(length: usize) -> Self {
Self::from_data(
Expand All @@ -35,9 +39,17 @@ impl<O: Offset> BinaryArray<O> {
)
}

/// Creates a new [`BinaryArray`] from lower-level parts
/// # Panics
/// * The length of the offset buffer must be larger than 1
/// * The length of the values must be equal to the last offset value
pub fn from_data(offsets: Buffer<O>, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
check_offsets(&offsets, values.len());

if let Some(validity) = &validity {
assert_eq!(offsets.len() - 1, validity.len());
}

Self {
data_type: if O::is_large() {
DataType::LargeBinary
Expand All @@ -51,6 +63,11 @@ impl<O: Offset> BinaryArray<O> {
}
}

/// Creates a new [`BinaryArray`] by slicing this [`BinaryArray`].
/// # Implementation
/// This function is `O(1)`: all data will be shared between both arrays.
/// # Panics
/// iff `offset + length > self.len()`.
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
let offsets = self.offsets.clone().slice(offset, length + 1);
Expand All @@ -62,19 +79,24 @@ impl<O: Offset> BinaryArray<O> {
offset: self.offset + offset,
}
}
}

/// Returns the element at index `i` as &str
// accessors
impl<O: Offset> BinaryArray<O> {
/// Returns the element at index `i`
/// # Panics
/// iff `i > self.len()`
pub fn value(&self, i: usize) -> &[u8] {
let offsets = self.offsets.as_slice();
let offset = offsets[i];
let offset_1 = offsets[i + 1];
let length = (offset_1 - offset).to_usize();
let offset = offset.to_usize();

&self.values.as_slice()[offset..offset + length]
&self.values[offset..offset + length]
}

/// Returns the element at index `i` as &str
/// Returns the element at index `i`
/// # Safety
/// Assumes that the `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
Expand All @@ -83,14 +105,16 @@ impl<O: Offset> BinaryArray<O> {
let length = (offset_1 - offset).to_usize();
let offset = offset.to_usize();

std::slice::from_raw_parts(self.values.as_ptr().add(offset), length)
&self.values[offset..offset + length]
}

/// Returns the offsets that slice `.values()` to return valid values.
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
&self.offsets
}

/// Returns all values in this array. Use `.offsets()` to slice them.
#[inline]
pub fn values(&self) -> &Buffer<u8> {
&self.values
Expand Down
7 changes: 7 additions & 0 deletions src/array/boolean/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ use crate::trusted_len::TrustedLen;

use super::{BooleanArray, MutableBooleanArray};

impl<P: AsRef<[Option<bool>]>> From<P> for BooleanArray {
/// Creates a new [`BooleanArray`] out of a slice of Optional `bool`.
fn from(slice: P) -> Self {
MutableBooleanArray::from(slice).into()
}
}

impl BooleanArray {
/// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
#[inline]
Expand Down
8 changes: 4 additions & 4 deletions src/array/boolean/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ impl<'a> IntoIterator for &'a BooleanArray {
}

impl<'a> BooleanArray {
/// constructs a new iterator
/// Returns an iterator over the optional values of this [`BooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> {
zip_validity(
Expand All @@ -23,7 +23,7 @@ impl<'a> BooleanArray {
)
}

/// Returns an iterator of `bool`
/// Returns an iterator over the values of this [`BooleanArray`]
#[inline]
pub fn values_iter(&'a self) -> BitmapIter<'a> {
self.values().iter()
Expand All @@ -41,7 +41,7 @@ impl<'a> IntoIterator for &'a MutableBooleanArray {
}

impl<'a> MutableBooleanArray {
/// Returns an iterator over `Option<bool>`
/// Returns an iterator over the optional values of this [`MutableBooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> {
zip_validity(
Expand All @@ -50,7 +50,7 @@ impl<'a> MutableBooleanArray {
)
}

/// Returns an iterator of `bool`
/// Returns an iterator over the values of this [`MutableBooleanArray`]
#[inline]
pub fn values_iter(&'a self) -> BitmapIter<'a> {
self.values().iter()
Expand Down
26 changes: 8 additions & 18 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ mod mutable;
pub use iterator::*;
pub use mutable::*;

/// A [`BooleanArray`] is arrow's equivalent to `Vec<Option<bool>>`, i.e.
/// an array designed for highly performant operations on optionally nullable booleans.
/// The size of this struct is `O(1)` as all data is stored behind an `Arc`.
/// The Arrow's equivalent to an immutable `Vec<Option<bool>>`, but with `1/16` of its size.
/// Cloning and slicing this struct is `O(1)`.
#[derive(Debug, Clone)]
pub struct BooleanArray {
data_type: DataType,
values: Bitmap,
validity: Option<Bitmap>,
offset: usize,
Expand Down Expand Up @@ -45,7 +43,6 @@ impl BooleanArray {
assert_eq!(values.len(), validity.len());
}
Self {
data_type: DataType::Boolean,
values,
validity,
offset: 0,
Expand All @@ -54,36 +51,36 @@ impl BooleanArray {

/// Returns a slice of this [`BooleanArray`].
/// # Implementation
/// This operation is `O(1)` as it amounts to essentially increase two ref counts.
/// This operation is `O(1)` as it amounts to increase two ref counts.
/// # Panic
/// This function panics iff `offset + length >= self.len()`.
#[inline]
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
Self {
data_type: self.data_type.clone(),
values: self.values.clone().slice(offset, length),
validity,
offset: self.offset + offset,
}
}

/// Returns the element at index `i` as bool
/// Returns the value at index `i`
/// # Panic
/// This function panics iff `i >= self.len()`.
#[inline]
pub fn value(&self, i: usize) -> bool {
self.values.get_bit(i)
}

/// Returns the element at index `i` as bool
///
/// # Safety
/// Caller must be sure that `i < self.len()`
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
self.values.get_bit_unchecked(i)
}

/// Returns the values bitmap of this [`BooleanArray`].
/// Returns the values of this [`BooleanArray`].
#[inline]
pub fn values(&self) -> &Bitmap {
&self.values
Expand All @@ -103,7 +100,7 @@ impl Array for BooleanArray {

#[inline]
fn data_type(&self) -> &DataType {
&self.data_type
&DataType::Boolean
}

#[inline]
Expand All @@ -122,10 +119,3 @@ impl std::fmt::Display for BooleanArray {
display_fmt(self.iter(), "BooleanArray", f, false)
}
}

impl<P: AsRef<[Option<bool>]>> From<P> for BooleanArray {
/// Creates a new [`BooleanArray`] out of a slice of Optional `bool`.
fn from(slice: P) -> Self {
MutableBooleanArray::from(slice).into()
}
}
3 changes: 2 additions & 1 deletion src/array/boolean/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{

use super::BooleanArray;

/// The mutable version of [`BooleanArray`]. See [`MutableArray`] for more details.
/// The Arrow's equivalent to `Vec<Option<bool>>`, but with `1/16` of its size.
/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.
#[derive(Debug)]
pub struct MutableBooleanArray {
values: MutableBitmap,
Expand Down
2 changes: 1 addition & 1 deletion src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub trait Growable<'a> {
/// This function panics if the range is out of bounds, i.e. if `start + len >= array.len()`.
fn extend(&mut self, index: usize, start: usize, len: usize);

/// Extends this [`GrowableArray`] with null elements, disregarding the bound arrays
/// Extends this [`Growable`] with null elements, disregarding the bound arrays
fn extend_validity(&mut self, additional: usize);

/// Converts itself to an `Arc<dyn Array>`, thereby finishing the mutation.
Expand Down
12 changes: 4 additions & 8 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
//! This module contains arrays: fixed-length and immutable containers with optional values
//! fixed-length and immutable containers with optional values
//! that are layed in memory according to the Arrow specification.
//! Each array type has its own `struct`. The following are the main array types:
//!
//! * [`PrimitiveArray`], an array of values with a fixed length such as integers, floats, etc.
//! * [`BooleanArray`], an array of boolean values (stored as a bitmap)
//! * [`Utf8Array`], an array of utf8 values
//! * [`BinaryArray`], an array of binary values
//! * [`ListArray`], an array of arrays (e.g. `[[1, 2], None, [], [None]]`)
//! * [`StructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`)
//!
//! This module contains constructors and accessors to operate on the arrays.
//! All the arrays implement the trait [`Array`] and are often trait objects.
//! Every array has a [`DataType`], which you can access with [`Array::data_type`].
//! This can be used to `downcast_ref` a `&dyn Array` to a concrete struct.
//! Arrays can share memory via [`crate::buffer::Buffer`] and thus cloning and slicing is `O(1)`.
//! All arrays implement the trait [`Array`] and are often trait objects that can be downcasted
//! to a concrete struct based on [`DataType`] available from [`Array::data_type`].
//! Arrays share memory via [`crate::buffer::Buffer`] and thus cloning and slicing them `O(1)`.
//!
//! This module also contains the mutable counterparts of arrays, that are neither clonable nor slicable, but that
//! can be operated in-place, such as [`MutablePrimitiveArray`] and [`MutableUtf8Array`].
Expand Down
9 changes: 9 additions & 0 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ pub use mutable::*;
/// an array designed for highly performant operations on optionally nullable slots,
/// backed by a physical type of a physical byte-width, such as `i32` or `f64`.
/// The size of this struct is `O(1)` as all data is stored behind an [`std::sync::Arc`].
/// # Example
/// ```
/// use arrow2::array::PrimitiveArray;
/// # fn main() {
/// let array = PrimitiveArray::<i32>::from([Some(1), None, Some(2)]);
/// assert_eq!(array.value(0), 1);
/// assert_eq!(array.values().as_slice(), &[1, 0, 2]);
/// # }
/// ```
#[derive(Debug, Clone)]
pub struct PrimitiveArray<T: NativeType> {
data_type: DataType,
Expand Down
3 changes: 2 additions & 1 deletion src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{

use super::PrimitiveArray;

/// The mutable version of [`PrimitiveArray`]. See [`MutableArray`] for more details.
/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
#[derive(Debug)]
pub struct MutablePrimitiveArray<T: NativeType> {
data_type: DataType,
Expand Down
13 changes: 6 additions & 7 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ mod mutable;
pub use iterator::*;
pub use mutable::*;

/// A [`Utf8Array`] is arrow's equivalent of `Vec<Option<String>>`, i.e.
/// an array designed for highly performant operations on optionally nullable strings.
/// The size of this struct is `O(1)` as all data is stored behind an `Arc`.
/// A [`Utf8Array`] is arrow's equivalent of an immutable `Vec<Option<String>>`.
/// Cloning and slicing this struct is `O(1)`.
/// # Example
/// ```
/// use std::iter::FromIterator;
/// use arrow2::array::Utf8Array;
/// # fn main() {
/// let data = vec![Some("hello"), None, Some("hello2")];
/// let array = Utf8Array::<i32>::from_iter(data);
/// assert_eq!(array.value(0), "hello");
/// let array = Utf8Array::<i32>::from([Some("hi"), None, Some("there")]);
/// assert_eq!(array.value(0), "hi");
/// assert_eq!(array.values().as_slice(), b"hithere".as_ref());
/// assert_eq!(array.offsets().as_slice(), &[0, 2, 2, 2 + 5]);
/// # }
/// ```
#[derive(Debug, Clone)]
Expand Down
2 changes: 1 addition & 1 deletion src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ impl Bitmap {
Bitmap::from_bytes(buffer.into(), length)
}

/// Creates a new [`Bitmap`] from [`Bytes`] and a length.
/// Creates a new [`Bitmap`] from a slice and length.
/// # Panic
/// Panics iff `length <= bytes.len() * 8`
#[inline]
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ impl<T: NativeType> MutableBuffer<T> {
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
/// if any of the items of the iterator is an error.
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
/// The only difference between this and [`try_from_trusted_len_iter`] is that this works
/// The only difference between this and [`Self::try_from_trusted_len_iter`] is that this works
/// on any iterator, while `try_from_trusted_len_iter` requires the iterator to implement the trait
/// [`TrustedLen`], which not every iterator currently implements due to limitations of the Rust compiler.
/// # Safety
Expand Down
2 changes: 1 addition & 1 deletion src/compute/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
use crate::array::{growable::make_growable, Array};
use crate::error::{ArrowError, Result};

/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
/// Concatenate multiple [Array] of the same type into a single [`Array`].
pub fn concatenate(arrays: &[&dyn Array]) -> Result<Box<dyn Array>> {
if arrays.is_empty() {
return Err(ArrowError::InvalidArgumentError(
Expand Down
Loading

0 comments on commit 83256a8

Please sign in to comment.