Skip to content

Commit

Permalink
ChunkSet trait
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 30, 2020
1 parent 0f380f1 commit 41b65d2
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 18 deletions.
2 changes: 2 additions & 0 deletions polars/src/chunked_array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ impl<T: ArrowPrimitiveType> DerefMut for PrimitiveChunkedBuilder<T> {
}
}

pub type BooleanChunkedBuilder = PrimitiveChunkedBuilder<BooleanType>;

pub struct Utf8ChunkedBuilder {
pub builder: StringBuilder,
capacity: usize,
Expand Down
1 change: 1 addition & 0 deletions polars/src/chunked_array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ impl<'a> ExactSizeIterator for Utf8IterManyChunk<'a> {}
impl<'a> ExactSizeIterator for Utf8IterManyChunkNullCheck<'a> {}

/// Trait for ChunkedArrays that don't have null values.
/// TODO: implement for faster paths
pub trait IntoNoNullIterator {
type Item;
type IntoIter: Iterator<Item = Self::Item>;
Expand Down
1 change: 1 addition & 0 deletions polars/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub mod cast;
pub mod chunkops;
pub mod comparison;
pub mod iterator;
pub mod set;
pub mod take;
#[cfg(feature = "temporal")]
pub mod temporal;
Expand Down
28 changes: 28 additions & 0 deletions polars/src/chunked_array/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,34 @@ use num::{Num, NumCast};
use std::cmp::Ordering;
use std::ops::{Add, Div};

/// Set values in a `ChunkedArray` in place.
pub trait ChunkSet<V> {
/// Set the values at indexes `idx` to some optional value `Option<T>`.
///
/// # Example
///
/// ```rust
/// # use polars::prelude::*;
/// let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// ca.set_at_idx(&[0, 1], Some(10)).unwrap();
///
/// assert_eq!(Vec::from(&ca), &[Some(10), Some(10), Some(3)]);
/// ```
fn set_at_idx<T: AsTakeIndex>(&mut self, idx: &T, opt_value: Option<V>) -> Result<&mut Self>;
/// Set the values where the mask evaluates to `true` to some optional value `Option<T>`.
///
/// # Example
///
/// ```rust
/// # use polars::prelude::*;
/// let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
/// ca.set(&mask, Some(5)).unwrap();
/// assert_eq!(Vec::from(&ca), &[Some(1), Some(5), Some(3)]);
/// ```
fn set(&mut self, mask: &BooleanChunked, opt_value: Option<V>) -> Result<&mut Self>;
}

/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
pub trait ChunkCast {
/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
Expand Down
175 changes: 175 additions & 0 deletions polars/src/chunked_array/set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
use crate::prelude::*;

impl<T> ChunkSet<T::Native> for ChunkedArray<T>
where
T: PolarsNumericType,
T::Native: Copy,
{
fn set_at_idx<I: AsTakeIndex>(
&mut self,
idx: &I,
value: Option<T::Native>,
) -> Result<&mut Self> {
// TODO: implement fast path
let mut idx_iter = idx.as_take_iter();
let mut ca_iter = self.into_iter().enumerate();

let mut builder = PrimitiveChunkedBuilder::<T>::new(self.name(), self.len());

while let Some(current_idx) = idx_iter.next() {
if current_idx > self.len() {
return Err(PolarsError::OutOfBounds);
}
while let Some((cnt_idx, opt_val)) = ca_iter.next() {
if cnt_idx == current_idx {
builder.append_option(value);
break;
} else {
builder.append_option(opt_val);
}
}
}
// the last idx is probably not the last value so we finish the iterator
while let Some((_, opt_val)) = ca_iter.next() {
builder.append_option(opt_val);
}

let ca = builder.finish();
self.chunks = ca.chunks;
Ok(self)
}

fn set(&mut self, mask: &BooleanChunked, value: Option<T::Native>) -> Result<&mut Self> {
if self.len() != mask.len() {
return Err(PolarsError::ShapeMisMatch);
}

// TODO: could make faster by also checking the mask for a fast path.
let ca: ChunkedArray<T> = match self.cont_slice() {
// fast path
Ok(slice) => slice
.iter()
.zip(mask)
.map(|(&val, opt_mask)| match opt_mask {
None => Some(val),
Some(true) => value,
Some(false) => Some(val),
})
.collect(),
// slower path
Err(_) => self
.into_iter()
.zip(mask)
.map(|(opt_val, opt_mask)| match opt_mask {
None => opt_val,
Some(true) => value,
Some(false) => opt_val,
})
.collect(),
};

self.chunks = ca.chunks;
Ok(self)
}
}

macro_rules! impl_chunkset {
($value_type:ty, $ca_type:ident, $builder:ident) => {
impl ChunkSet<$value_type> for $ca_type {
fn set_at_idx<I: AsTakeIndex>(
&mut self,
idx: &I,
value: Option<$value_type>,
) -> Result<&mut Self> {
let mut idx_iter = idx.as_take_iter();
let mut ca_iter = self.into_iter().enumerate();

let mut builder = $builder::new(self.name(), self.len());

while let Some(current_idx) = idx_iter.next() {
if current_idx > self.len() {
return Err(PolarsError::OutOfBounds);
}
while let Some((cnt_idx, opt_val)) = ca_iter.next() {
if cnt_idx == current_idx {
builder.append_option(value);
break;
} else {
builder.append_option(opt_val);
}
}
}
// the last idx is probably not the last value so we finish the iterator
while let Some((_, opt_val)) = ca_iter.next() {
builder.append_option(opt_val);
}

let ca = builder.finish();
self.chunks = ca.chunks;
Ok(self)
}

fn set(
&mut self,
mask: &BooleanChunked,
value: Option<$value_type>,
) -> Result<&mut Self> {
if self.len() != mask.len() {
return Err(PolarsError::ShapeMisMatch);
}

let ca: $ca_type = self
.into_iter()
.zip(mask)
.map(|(opt_val, opt_mask)| match opt_mask {
None => opt_val,
Some(true) => value,
Some(false) => opt_val,
})
.collect();

self.chunks = ca.chunks;
Ok(self)
}
}
};
}

impl_chunkset!(&str, Utf8Chunked, Utf8ChunkedBuilder);
impl_chunkset!(bool, BooleanChunked, BooleanChunkedBuilder);

#[cfg(test)]
mod test {
use crate::prelude::*;

#[test]
fn test_set() {
let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
ca.set(&mask, Some(5)).unwrap();
assert_eq!(Vec::from(&ca), &[Some(1), Some(5), Some(3)]);
let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
let mask = BooleanChunked::new_from_opt_slice("mask", &[None, Some(true), None]);
ca.set(&mask, Some(5)).unwrap();
assert_eq!(Vec::from(&ca), &[Some(1), Some(5), Some(3)]);

ca.set_at_idx(&[0, 1], Some(10)).unwrap();
assert_eq!(Vec::from(&ca), &[Some(10), Some(10), Some(3)]);

// check that out of bounds error doesn't modify original
assert!(ca.set_at_idx(&[0, 10], Some(0)).is_err());
assert_eq!(Vec::from(&ca), &[Some(10), Some(10), Some(3)]);

// test booleans
let mut ca = BooleanChunked::new_from_slice("a", &[true, true, true]);
let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
ca.set(&mask, None).unwrap();
assert_eq!(Vec::from(&ca), &[Some(true), None, Some(true)]);

// test utf8
let mut ca = Utf8Chunked::new_from_slice("a", &["foo", "foo", "foo"]);
let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
ca.set(&mask, Some("bar")).unwrap();
assert_eq!(Vec::from(&ca), &[Some("foo"), Some("bar"), Some("foo")]);
}
}
18 changes: 6 additions & 12 deletions polars/src/chunked_array/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,21 +338,15 @@ impl AsTakeIndex for &UInt32Chunked {
}
}

impl AsTakeIndex for [usize] {
fn as_take_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
Box::new(self.iter().copied())
}
fn take_index_len(&self) -> usize {
self.len()
}
}

impl AsTakeIndex for Vec<usize> {
impl<T> AsTakeIndex for T
where
T: AsRef<[usize]>,
{
fn as_take_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
Box::new(self.iter().copied())
Box::new(self.as_ref().iter().copied())
}
fn take_index_len(&self) -> usize {
self.len()
self.as_ref().len()
}
}

Expand Down
12 changes: 6 additions & 6 deletions polars/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@ pub use crate::{
chunked_array::{
arithmetic::Pow,
builder::{
AlignedAlloc, AlignedVec, LargListBuilderTrait, LargeListPrimitiveChunkedBuilder,
LargeListUtf8ChunkedBuilder, NewChunkedArray, PrimitiveChunkedBuilder,
Utf8ChunkedBuilder,
AlignedAlloc, AlignedVec, BooleanChunkedBuilder, LargListBuilderTrait,
LargeListPrimitiveChunkedBuilder, LargeListUtf8ChunkedBuilder, NewChunkedArray,
PrimitiveChunkedBuilder, Utf8ChunkedBuilder,
},
chunkops::ChunkOps,
comparison::NumComp,
iterator::{IntoNoNullIterator, NumericChunkIterDispatch},
ops::{
ChunkAgg, ChunkApply, ChunkCast, ChunkCompare, ChunkFillNone, ChunkFilter, ChunkFull,
ChunkReverse, ChunkShift, ChunkSort, ChunkUnique, FillNoneStrategy,
ChunkReverse, ChunkSet, ChunkShift, ChunkSort, ChunkUnique,
FillNoneStrategy,
},
take::{
AsTakeIndex, IntoTakeRandom, NumTakeRandomChunked, NumTakeRandomCont, Take, TakeRandom,
Expand Down Expand Up @@ -50,11 +51,10 @@ pub(crate) fn create_df() -> DataFrame {
#[cfg(feature = "parquet_ser")]
pub use crate::frame::ser::parquet::ParquetReader;


#[macro_export]
macro_rules! as_result {
($block:block) => {{
let res: Result<_> = $block;
res
}}
}};
}

0 comments on commit 41b65d2

Please sign in to comment.