Skip to content

Commit

Permalink
random access to chunkedarray, without runtime downcasting
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 5, 2020
1 parent 3854002 commit b89d917
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 16 deletions.
4 changes: 4 additions & 0 deletions polars/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ impl<T> ChunkedArray<T> {
pub fn append_array(&mut self, other: ArrayRef) -> Result<()> {
if other.data_type() == self.field.data_type() {
self.chunks.push(other);
self.chunk_id = create_chunk_id(&self.chunks);
Ok(())
} else {
Err(PolarsError::DataTypeMisMatch)
Expand Down Expand Up @@ -336,6 +337,9 @@ impl<T> ChunkedArray<T> {
/// Get the index of the chunk and the index of the value in that chunk
#[inline]
pub(crate) fn index_to_chunked_index(&self, index: usize) -> (usize, usize) {
if self.chunk_id().len() == 1 {
return (0, index);
}
let mut index_remainder = index;
let mut current_chunk_idx = 0;

Expand Down
21 changes: 21 additions & 0 deletions polars/src/chunked_array/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,27 @@ use std::cmp::Ordering;
use std::marker::Sized;
use std::ops::{Add, Div};

/// Random access
pub trait TakeRandom {
type Item;

/// Get a nullable value by index.
fn get(&self, index: usize) -> Option<Self::Item>;

/// Get a value by index and ignore the null bit.
unsafe fn get_unchecked(&self, index: usize) -> Self::Item;
}
// Utility trait because associated type needs a lifetime
pub trait TakeRandomUtf8 {
type Item;

/// Get a nullable value by index.
fn get(self, index: usize) -> Option<Self::Item>;

/// Get a value by index and ignore the null bit.
unsafe fn get_unchecked(self, index: usize) -> Self::Item;
}

/// Fast access by index.
pub trait ChunkTake {
/// Take values from ChunkedArray by index.
Expand Down
103 changes: 90 additions & 13 deletions polars/src/chunked_array/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,77 @@ use crate::chunked_array::builder::{
get_large_list_builder, PrimitiveChunkedBuilder, Utf8ChunkedBuilder,
};
use crate::prelude::*;
use arrow::array::{Array, BooleanArray, LargeListArray, PrimitiveArray, StringArray};
use arrow::array::{
Array, ArrayRef, BooleanArray, LargeListArray, PrimitiveArray, PrimitiveArrayOps, StringArray,
};
use std::sync::Arc;

macro_rules! impl_take_random_get {
($self:ident, $index:ident, $array_type:ty) => {{
let (chunk_idx, idx) = $self.index_to_chunked_index($index);
let arr = unsafe {
let arr = $self.chunks.get_unchecked(chunk_idx);
&*(arr as *const ArrayRef as *const Arc<$array_type>)
};
if arr.is_valid(idx) {
Some(arr.value(idx))
} else {
None
}
}};
}

macro_rules! impl_take_random_get_unchecked {
($self:ident, $index:ident, $array_type:ty) => {{
let (chunk_idx, idx) = $self.index_to_chunked_index($index);
let arr = {
let arr = $self.chunks.get_unchecked(chunk_idx);
&*(arr as *const ArrayRef as *const Arc<$array_type>)
};
arr.value(idx)
}};
}

impl<T> TakeRandom for ChunkedArray<T>
where
T: ArrowPrimitiveType,
{
type Item = T::Native;

fn get(&self, index: usize) -> Option<Self::Item> {
impl_take_random_get!(self, index, PrimitiveArray<T>)
}

unsafe fn get_unchecked(&self, index: usize) -> Self::Item {
impl_take_random_get_unchecked!(self, index, PrimitiveArray<T>)
}
}

impl<'a> TakeRandomUtf8 for &'a Utf8Chunked {
type Item = &'a str;

fn get(self, index: usize) -> Option<Self::Item> {
impl_take_random_get!(self, index, StringArray)
}

unsafe fn get_unchecked(self, index: usize) -> Self::Item {
impl_take_random_get_unchecked!(self, index, StringArray)
}
}

impl TakeRandom for LargeListChunked {
type Item = Series;

fn get(&self, index: usize) -> Option<Self::Item> {
let opt_arr = impl_take_random_get!(self, index, LargeListArray);
opt_arr.map(|arr| (self.name(), arr).into())
}

unsafe fn get_unchecked(&self, index: usize) -> Self::Item {
let arr = impl_take_random_get_unchecked!(self, index, LargeListArray);
(self.name(), arr).into()
}
}

macro_rules! impl_take {
($self:ident, $indices:ident, $capacity:ident, $builder:ident) => {{
Expand Down Expand Up @@ -325,18 +395,7 @@ impl AsTakeIndex for [u32] {
}
}

/// Fast indexing in a `ChunkedArray` by doing only a one time downcast up front.
pub trait TakeRandom {
type Item;

/// Get a nullable value by index.
fn get(&self, index: usize) -> Option<Self::Item>;

/// Get a value by index and ignore the null bit.
unsafe fn get_unchecked(&self, index: usize) -> Self::Item;
}

/// Create a type that implements `TakeRandom`.
/// Create a type that implements a faster `TakeRandom`.
pub trait IntoTakeRandom<'a> {
type Item;
type TakeRandom;
Expand Down Expand Up @@ -657,3 +716,21 @@ impl<'a> TakeRandom for ListTakeRandomSingleChunk<'a> {
(self.name, self.arr.value(index)).into()
}
}

#[cfg(test)]
mod test {
use crate::prelude::*;

#[test]
fn test_take_random() {
let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
assert_eq!(ca.get(0), Some(1));
assert_eq!(ca.get(1), Some(2));
assert_eq!(ca.get(2), Some(3));

let ca = Utf8Chunked::new_from_slice("a", &["a", "b", "c"]);
assert_eq!(ca.get(0), Some("a"));
assert_eq!(ca.get(1), Some("b"));
assert_eq!(ca.get(2), Some("c"));
}
}
1 change: 1 addition & 0 deletions polars/src/doc/changelog/v0_5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
//! * `Groupby` operation can be done on multiple keys.
//! * `Groupby` `first` operation.
//! * `Pivot` operation.
//! * Random access to `ChunkedArray` types via `.get` and `.get_unchecked`.
//!
2 changes: 1 addition & 1 deletion polars/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl Debug for Series {
write![f, "Series: '{}' [{}]\n[\n", $name, $dtype]?;

for i in 0..limit {
let v = $a.get(i);
let v = $a.get_any(i);
write!(f, "\t{}\n", v)?;
}

Expand Down
4 changes: 2 additions & 2 deletions polars/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ pub use crate::{
ops::{
ChunkAgg, ChunkApply, ChunkCast, ChunkCompare, ChunkFillNone, ChunkFilter, ChunkFull,
ChunkReverse, ChunkSet, ChunkShift, ChunkSort, ChunkTake, ChunkUnique,
FillNoneStrategy,
FillNoneStrategy, TakeRandom, TakeRandomUtf8,
},
take::{AsTakeIndex, IntoTakeRandom, NumTakeRandomChunked, NumTakeRandomCont, TakeRandom},
take::{AsTakeIndex, IntoTakeRandom, NumTakeRandomChunked, NumTakeRandomCont},
ChunkedArray, Downcast,
},
datatypes,
Expand Down

0 comments on commit b89d917

Please sign in to comment.