Skip to content

Commit

Permalink
ChunkSet: added methods to apply closures
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 30, 2020
1 parent 41b65d2 commit d0be04c
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 89 deletions.
61 changes: 51 additions & 10 deletions polars/src/chunked_array/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,75 @@ use arrow::compute;
use itertools::Itertools;
use num::{Num, NumCast};
use std::cmp::Ordering;
use std::marker::Sized;
use std::ops::{Add, Div};

/// Set values in a `ChunkedArray` in place.
pub trait ChunkSet<V> {
/// Create a `ChunkedArray` with new values by index or by boolean mask.
/// Note that these operations clone data. This is however the only way we can modify at mask or
/// index level as the underlying Arrow arrays are immutable.
pub trait ChunkSet<'a, V> {
/// Set the values at indexes `idx` to some optional value `Option<T>`.
///
/// # Example
///
/// ```rust
/// # use polars::prelude::*;
/// let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// ca.set_at_idx(&[0, 1], Some(10)).unwrap();
/// let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// let new = ca.set_at_idx(&[0, 1], Some(10)).unwrap();
///
/// assert_eq!(Vec::from(&ca), &[Some(10), Some(10), Some(3)]);
/// assert_eq!(Vec::from(&new), &[Some(10), Some(10), Some(3)]);
/// ```
fn set_at_idx<T: AsTakeIndex>(&mut self, idx: &T, opt_value: Option<V>) -> Result<&mut Self>;
fn set_at_idx<T: AsTakeIndex>(&'a self, idx: &T, opt_value: Option<V>) -> Result<Self>
where
Self: Sized;

/// Set the values at indexes `idx` by applying a closure to these values.
///
/// # Example
///
/// ```rust
/// # use polars::prelude::*;
/// let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// let new = ca.set_at_idx_with(&[0, 1], |opt_v| opt_v.map(|v| v - 5)).unwrap();
///
/// assert_eq!(Vec::from(&new), &[Some(-4), Some(-3), Some(3)]);
/// ```
fn set_at_idx_with<T: AsTakeIndex, F>(&'a self, idx: &T, f: F) -> Result<Self>
where
Self: Sized,
F: Fn(Option<V>) -> Option<V>;
/// Set the values where the mask evaluates to `true` to some optional value `Option<T>`.
///
/// # Example
///
/// ```rust
/// # use polars::prelude::*;
/// let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
/// ca.set(&mask, Some(5)).unwrap();
/// assert_eq!(Vec::from(&ca), &[Some(1), Some(5), Some(3)]);
/// let new = ca.set(&mask, Some(5)).unwrap();
/// assert_eq!(Vec::from(&new), &[Some(1), Some(5), Some(3)]);
/// ```
fn set(&mut self, mask: &BooleanChunked, opt_value: Option<V>) -> Result<&mut Self>;
fn set(&'a self, mask: &BooleanChunked, opt_value: Option<V>) -> Result<Self>
where
Self: Sized;

/// Set the values where the mask evaluates to `true` by applying a closure to these values.
///
/// # Example
///
/// ```rust
/// # use polars::prelude::*;
/// let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
/// let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
/// let new = ca.set_with(&mask, |opt_v| opt_v.map(
/// |v| v * 2
/// )).unwrap();
/// assert_eq!(Vec::from(&new), &[Some(1), Some(4), Some(3)]);
/// ```
fn set_with<F>(&'a self, mask: &BooleanChunked, f: F) -> Result<Self>
where
Self: Sized,
F: Fn(Option<V>) -> Option<V>;
}

/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
Expand Down
173 changes: 94 additions & 79 deletions polars/src/chunked_array/set.rs
Original file line number Diff line number Diff line change
@@ -1,58 +1,35 @@
use crate::prelude::*;

impl<T> ChunkSet<T::Native> for ChunkedArray<T>
impl<'a, T> ChunkSet<'a, T::Native> for ChunkedArray<T>
where
T: PolarsNumericType,
T::Native: Copy,
{
fn set_at_idx<I: AsTakeIndex>(
&mut self,
idx: &I,
value: Option<T::Native>,
) -> Result<&mut Self> {
// TODO: implement fast path
let mut idx_iter = idx.as_take_iter();
let mut ca_iter = self.into_iter().enumerate();

let mut builder = PrimitiveChunkedBuilder::<T>::new(self.name(), self.len());

while let Some(current_idx) = idx_iter.next() {
if current_idx > self.len() {
return Err(PolarsError::OutOfBounds);
}
while let Some((cnt_idx, opt_val)) = ca_iter.next() {
if cnt_idx == current_idx {
builder.append_option(value);
break;
} else {
builder.append_option(opt_val);
}
}
}
// the last idx is probably not the last value so we finish the iterator
while let Some((_, opt_val)) = ca_iter.next() {
builder.append_option(opt_val);
}
fn set_at_idx<I: AsTakeIndex>(&'a self, idx: &I, value: Option<T::Native>) -> Result<Self> {
self.set_at_idx_with(idx, |_| value)
}

let ca = builder.finish();
self.chunks = ca.chunks;
Ok(self)
fn set(&'a self, mask: &BooleanChunked, value: Option<T::Native>) -> Result<Self> {
self.set_with(mask, |_| value)
}

fn set(&mut self, mask: &BooleanChunked, value: Option<T::Native>) -> Result<&mut Self> {
fn set_with<F>(&'a self, mask: &BooleanChunked, f: F) -> Result<Self>
where
F: Fn(Option<T::Native>) -> Option<T::Native>,
{
if self.len() != mask.len() {
return Err(PolarsError::ShapeMisMatch);
}

// TODO: could make faster by also checking the mask for a fast path.
let ca: ChunkedArray<T> = match self.cont_slice() {
let mut ca: ChunkedArray<T> = match self.cont_slice() {
// fast path
Ok(slice) => slice
.iter()
.zip(mask)
.map(|(&val, opt_mask)| match opt_mask {
None => Some(val),
Some(true) => value,
Some(true) => f(Some(val)),
Some(false) => Some(val),
})
.collect(),
Expand All @@ -62,25 +39,77 @@ where
.zip(mask)
.map(|(opt_val, opt_mask)| match opt_mask {
None => opt_val,
Some(true) => value,
Some(false) => opt_val,
Some(true) => f(opt_val),
Some(false) => f(opt_val),
})
.collect(),
};

self.chunks = ca.chunks;
Ok(self)
ca.rename(self.name());
Ok(ca)
}

fn set_at_idx_with<I: AsTakeIndex, F>(&'a self, idx: &I, f: F) -> Result<Self>
where
F: Fn(Option<T::Native>) -> Option<T::Native>,
{
// TODO: implement fast path
let mut idx_iter = idx.as_take_iter();
let mut ca_iter = self.into_iter().enumerate();

let mut builder = PrimitiveChunkedBuilder::<T>::new(self.name(), self.len());

while let Some(current_idx) = idx_iter.next() {
if current_idx > self.len() {
return Err(PolarsError::OutOfBounds);
}
while let Some((cnt_idx, opt_val)) = ca_iter.next() {
if cnt_idx == current_idx {
builder.append_option(f(opt_val));
break;
} else {
builder.append_option(opt_val);
}
}
}
// the last idx is probably not the last value so we finish the iterator
while let Some((_, opt_val)) = ca_iter.next() {
builder.append_option(opt_val);
}

let ca = builder.finish();
Ok(ca)
}
}

macro_rules! impl_chunkset {
($value_type:ty, $ca_type:ident, $builder:ident) => {
impl ChunkSet<$value_type> for $ca_type {
fn set_at_idx<I: AsTakeIndex>(
&mut self,
idx: &I,
value: Option<$value_type>,
) -> Result<&mut Self> {
impl<'a> ChunkSet<'a, $value_type> for $ca_type {
fn set_with<F>(&'a self, mask: &BooleanChunked, f: F) -> Result<Self>
where
F: Fn(Option<$value_type>) -> Option<$value_type>,
{
if self.len() != mask.len() {
return Err(PolarsError::ShapeMisMatch);
}

let mut ca: $ca_type = self
.into_iter()
.zip(mask)
.map(|(opt_val, opt_mask)| match opt_mask {
None => opt_val,
Some(true) => f(opt_val),
Some(false) => opt_val,
})
.collect();

ca.rename(self.name());
Ok(ca)
}
fn set_at_idx_with<I: AsTakeIndex, F>(&'a self, idx: &I, f: F) -> Result<Self>
where
F: Fn(Option<$value_type>) -> Option<$value_type>,
{
let mut idx_iter = idx.as_take_iter();
let mut ca_iter = self.into_iter().enumerate();

Expand All @@ -92,7 +121,7 @@ macro_rules! impl_chunkset {
}
while let Some((cnt_idx, opt_val)) = ca_iter.next() {
if cnt_idx == current_idx {
builder.append_option(value);
builder.append_option(f(opt_val));
break;
} else {
builder.append_option(opt_val);
Expand All @@ -105,37 +134,25 @@ macro_rules! impl_chunkset {
}

let ca = builder.finish();
self.chunks = ca.chunks;
Ok(self)
Ok(ca)
}

fn set(
&mut self,
mask: &BooleanChunked,
fn set_at_idx<I: AsTakeIndex>(
&'a self,
idx: &I,
value: Option<$value_type>,
) -> Result<&mut Self> {
if self.len() != mask.len() {
return Err(PolarsError::ShapeMisMatch);
}

let ca: $ca_type = self
.into_iter()
.zip(mask)
.map(|(opt_val, opt_mask)| match opt_mask {
None => opt_val,
Some(true) => value,
Some(false) => opt_val,
})
.collect();
) -> Result<Self> {
self.set_at_idx_with(idx, |_| value)
}

self.chunks = ca.chunks;
Ok(self)
fn set(&'a self, mask: &BooleanChunked, value: Option<$value_type>) -> Result<Self> {
self.set_with(mask, |_| value)
}
}
};
}

impl_chunkset!(&str, Utf8Chunked, Utf8ChunkedBuilder);
impl_chunkset!(&'a str, Utf8Chunked, Utf8ChunkedBuilder);
impl_chunkset!(bool, BooleanChunked, BooleanChunkedBuilder);

#[cfg(test)]
Expand All @@ -144,32 +161,30 @@ mod test {

#[test]
fn test_set() {
let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
ca.set(&mask, Some(5)).unwrap();
let ca = ca.set(&mask, Some(5)).unwrap();
assert_eq!(Vec::from(&ca), &[Some(1), Some(5), Some(3)]);
let mut ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
let ca = Int32Chunked::new_from_slice("a", &[1, 2, 3]);
let mask = BooleanChunked::new_from_opt_slice("mask", &[None, Some(true), None]);
ca.set(&mask, Some(5)).unwrap();
let ca = ca.set(&mask, Some(5)).unwrap();
assert_eq!(Vec::from(&ca), &[Some(1), Some(5), Some(3)]);

ca.set_at_idx(&[0, 1], Some(10)).unwrap();
let ca = ca.set_at_idx(&[0, 1], Some(10)).unwrap();
assert_eq!(Vec::from(&ca), &[Some(10), Some(10), Some(3)]);

// check that out of bounds error doesn't modify original
assert!(ca.set_at_idx(&[0, 10], Some(0)).is_err());
assert_eq!(Vec::from(&ca), &[Some(10), Some(10), Some(3)]);

// test booleans
let mut ca = BooleanChunked::new_from_slice("a", &[true, true, true]);
let ca = BooleanChunked::new_from_slice("a", &[true, true, true]);
let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
ca.set(&mask, None).unwrap();
let ca = ca.set(&mask, None).unwrap();
assert_eq!(Vec::from(&ca), &[Some(true), None, Some(true)]);

// test utf8
let mut ca = Utf8Chunked::new_from_slice("a", &["foo", "foo", "foo"]);
let ca = Utf8Chunked::new_from_slice("a", &["foo", "foo", "foo"]);
let mask = BooleanChunked::new_from_slice("mask", &[false, true, false]);
ca.set(&mask, Some("bar")).unwrap();
let ca = ca.set(&mask, Some("bar")).unwrap();
assert_eq!(Vec::from(&ca), &[Some("foo"), Some("bar"), Some("foo")]);
}
}

0 comments on commit d0be04c

Please sign in to comment.