Skip to content

Commit

Permalink
Python apply -> list (#3406)
Browse files Browse the repository at this point in the history
If we cannot create a list builder we fall back
on AnyValue's which can handle arbitrary nesting
  • Loading branch information
ritchie46 committed May 16, 2022
1 parent 1135178 commit 91e089b
Show file tree
Hide file tree
Showing 14 changed files with 131 additions and 84 deletions.
82 changes: 52 additions & 30 deletions polars/polars-core/src/chunked_array/builder/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,38 +333,60 @@ pub fn get_list_builder(
value_capacity: usize,
list_capacity: usize,
name: &str,
) -> Box<dyn ListBuilderTrait> {
) -> Result<Box<dyn ListBuilderTrait>> {
let physical_type = dt.to_physical();

macro_rules! get_primitive_builder {
($type:ty) => {{
let builder = ListPrimitiveChunkedBuilder::<$type>::new(
&name,
list_capacity,
value_capacity,
dt.clone(),
);
Box::new(builder)
}};
}
macro_rules! get_bool_builder {
() => {{
let builder = ListBooleanChunkedBuilder::new(&name, list_capacity, value_capacity);
Box::new(builder)
}};
}
macro_rules! get_utf8_builder {
() => {{
let builder = ListUtf8ChunkedBuilder::new(&name, list_capacity, 5 * value_capacity);
Box::new(builder)
}};
}
match_dtype_to_physical_apply_macro!(
physical_type,
get_primitive_builder,
get_utf8_builder,
get_bool_builder
)
let _err = || -> Result<Box<dyn ListBuilderTrait>> {
Err(PolarsError::ComputeError(
format!(
"list builder not supported for this dtype: {}",
&physical_type
)
.into(),
))
};

match &physical_type {
#[cfg(feature = "object")]
DataType::Object(_) => _err(),
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => _err(),
#[cfg(feature = "dtype-categorical")]
DataType::Categorical(_) => _err(),
_ => {
macro_rules! get_primitive_builder {
($type:ty) => {{
let builder = ListPrimitiveChunkedBuilder::<$type>::new(
&name,
list_capacity,
value_capacity,
dt.clone(),
);
Box::new(builder)
}};
}
macro_rules! get_bool_builder {
() => {{
let builder =
ListBooleanChunkedBuilder::new(&name, list_capacity, value_capacity);
Box::new(builder)
}};
}
macro_rules! get_utf8_builder {
() => {{
let builder =
ListUtf8ChunkedBuilder::new(&name, list_capacity, 5 * value_capacity);
Box::new(builder)
}};
}
Ok(match_dtype_to_physical_apply_macro!(
physical_type,
get_primitive_builder,
get_utf8_builder,
get_bool_builder
))
}
}
}

pub struct AnonymousListBuilder<'a> {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ mod test {

#[test]
fn test_iter_list() {
let mut builder = get_list_builder(&DataType::Int32, 10, 10, "");
let mut builder = get_list_builder(&DataType::Int32, 10, 10, "").unwrap();
builder.append_series(&Series::new("", &[1, 2, 3]));
builder.append_series(&Series::new("", &[3, 2, 1]));
builder.append_series(&Series::new("", &[1, 1]));
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/list/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl ListChunked {
self.get_values_size() + vals_size_other + 1,
length,
self.name(),
);
)?;
self.into_iter().for_each(|opt_s| {
let opt_s = opt_s.map(|mut s| {
for append in &to_append {
Expand Down Expand Up @@ -258,7 +258,7 @@ impl ListChunked {
self.get_values_size() + vals_size_other + 1,
length,
self.name(),
);
)?;

for _ in 0..self.len() {
let mut acc = match first_iter.next().unwrap() {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ impl ChunkAggSeries for Utf8Chunked {

macro_rules! one_null_list {
($self:ident, $dtype: expr) => {{
let mut builder = get_list_builder(&$dtype, 0, 1, $self.name());
let mut builder = get_list_builder(&$dtype, 0, 1, $self.name()).unwrap();
builder.append_opt_series(None);
builder.finish().into_series()
}};
Expand Down
12 changes: 6 additions & 6 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ mod test {

#[test]
fn test_explode_list() -> Result<()> {
let mut builder = get_list_builder(&DataType::Int32, 5, 5, "a");
let mut builder = get_list_builder(&DataType::Int32, 5, 5, "a")?;

builder.append_series(&Series::new("", &[1, 2, 3, 3]));
builder.append_series(&Series::new("", &[1]));
Expand Down Expand Up @@ -412,7 +412,7 @@ mod test {

#[test]
fn test_explode_empty_list() -> Result<()> {
let mut builder = get_list_builder(&DataType::Int32, 1, 1, "a");
let mut builder = get_list_builder(&DataType::Int32, 1, 1, "a")?;

let vals: [i32; 0] = [];

Expand All @@ -430,7 +430,7 @@ mod test {
#[test]
fn test_explode_empty_list_slot() -> Result<()> {
// primitive
let mut builder = get_list_builder(&DataType::Int32, 5, 5, "a");
let mut builder = get_list_builder(&DataType::Int32, 5, 5, "a")?;
builder.append_series(&Series::new("", &[1i32, 2]));
builder.append_series(&Int32Chunked::from_slice("", &[]).into_series());
builder.append_series(&Series::new("", &[3i32]));
Expand All @@ -443,7 +443,7 @@ mod test {
);

// more primitive
let mut builder = get_list_builder(&DataType::Int32, 5, 5, "a");
let mut builder = get_list_builder(&DataType::Int32, 5, 5, "a")?;
builder.append_series(&Series::new("", &[1i32]));
builder.append_series(&Int32Chunked::from_slice("", &[]).into_series());
builder.append_series(&Series::new("", &[2i32]));
Expand All @@ -458,7 +458,7 @@ mod test {
);

// utf8
let mut builder = get_list_builder(&DataType::Utf8, 5, 5, "a");
let mut builder = get_list_builder(&DataType::Utf8, 5, 5, "a")?;
builder.append_series(&Series::new("", &["abc"]));
builder.append_series(
&<Utf8Chunked as NewChunkedArray<Utf8Type, &str>>::from_slice("", &[]).into_series(),
Expand All @@ -480,7 +480,7 @@ mod test {
);

// boolean
let mut builder = get_list_builder(&DataType::Boolean, 5, 5, "a");
let mut builder = get_list_builder(&DataType::Boolean, 5, 5, "a")?;
builder.append_series(&Series::new("", &[true]));
builder.append_series(&BooleanChunked::from_slice("", &[]).into_series());
builder.append_series(&Series::new("", &[false]));
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-core/src/chunked_array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ impl ChunkFullNull for Utf8Chunked {

impl ChunkFull<&Series> for ListChunked {
fn full(name: &str, value: &Series, length: usize) -> ListChunked {
let mut builder = get_list_builder(value.dtype(), value.len() * length, length, name);
let mut builder =
get_list_builder(value.dtype(), value.len() * length, length, name).unwrap();
for _ in 0..length {
builder.append_series(value)
}
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-core/src/chunked_array/upstream_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ where
None => return ListChunked::full_null("", 0),
};
// We don't know the needed capacity. We arbitrarily choose an average of 5 elements per series.
let mut builder = get_list_builder(v.borrow().dtype(), capacity * 5, capacity, "collected");
let mut builder =
get_list_builder(v.borrow().dtype(), capacity * 5, capacity, "collected").unwrap();

builder.append_series(v.borrow());
for s in it {
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/named_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl<T: AsRef<[Series]>> NamedFrom<T, ListType> for Series {
} else {
let values_cap = series_slice.iter().fold(0, |acc, s| acc + s.len());

let mut builder = get_list_builder(dt, values_cap, list_cap, name);
let mut builder = get_list_builder(dt, values_cap, list_cap, name).unwrap();
for series in series_slice {
builder.append_series(series)
}
Expand All @@ -154,7 +154,7 @@ impl<T: AsRef<[Option<Series>]>> NamedFrom<T, [Option<Series>]> for Series {
.expect("cannot create List Series from a slice of nulls")
.dtype();

let mut builder = get_list_builder(dt, values_cap, series_slice.len(), name);
let mut builder = get_list_builder(dt, values_cap, series_slice.len(), name).unwrap();
for series in series_slice {
builder.append_opt_series(series.as_ref())
}
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/ops/to_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ impl Series {
}

let mut builder =
get_list_builder(s_ref.dtype(), s_ref.len(), rows as usize, self.name());
get_list_builder(s_ref.dtype(), s_ref.len(), rows as usize, self.name())?;

let mut offset = 0i64;
for _ in 0..rows {
Expand All @@ -124,7 +124,7 @@ mod test {
fn test_to_list() -> Result<()> {
let s = Series::new("a", &[1, 2, 3]);

let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name());
let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name())?;
builder.append_series(&s);
let expected = builder.finish();

Expand Down
3 changes: 1 addition & 2 deletions polars/polars-lazy/src/tests/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,6 @@ fn test_lazy_query_5() {
.agg([col("day").head(Some(2))])
.collect()
.unwrap();
dbg!(&out);
let s = out
.select_at_idx(1)
.unwrap()
Expand Down Expand Up @@ -1566,7 +1565,7 @@ fn test_sort_by_suffix() -> Result<()> {
#[test]
fn test_list_in_select_context() -> Result<()> {
let s = Series::new("a", &[1, 2, 3]);
let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name());
let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name()).unwrap();
builder.append_series(&s);
let expected = builder.finish().into_series();

Expand Down
6 changes: 3 additions & 3 deletions py-polars/src/apply/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ pub fn apply_lambda_unknown<'a>(
let dt = series.dtype();
return Ok((
PySeries::new(
apply_lambda_with_list_out_type(df, py, lambda, null_count, Some(&series), dt)
apply_lambda_with_list_out_type(df, py, lambda, null_count, Some(&series), dt)?
.into_series(),
)
.into_py(py),
Expand Down Expand Up @@ -207,12 +207,12 @@ pub fn apply_lambda_with_list_out_type<'a>(
init_null_count: usize,
first_value: Option<&Series>,
dt: &DataType,
) -> ListChunked {
) -> PyResult<ListChunked> {
let columns = df.get_columns();

let skip = if first_value.is_some() { 1 } else { 0 };
if init_null_count == df.height() {
ChunkedArray::full_null("apply", df.height())
Ok(ChunkedArray::full_null("apply", df.height()))
} else {
let iter = ((init_null_count + skip)..df.height()).map(|idx| {
let iter = columns.iter().map(|s: &Series| Wrap(s.get(idx)));
Expand Down
9 changes: 5 additions & 4 deletions py-polars/src/apply/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pub mod dataframe;
pub mod series;

use crate::prelude::ObjectValue;
use crate::{PySeries, Wrap};
use crate::{PyPolarsErr, PySeries, Wrap};
use polars::chunked_array::builder::get_list_builder;
use polars::prelude::*;
use polars_core::utils::CustomIterTools;
Expand Down Expand Up @@ -228,14 +228,15 @@ fn iterator_to_list(
first_value: Option<&Series>,
name: &str,
capacity: usize,
) -> ListChunked {
let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
) -> PyResult<ListChunked> {
let mut builder =
get_list_builder(dt, capacity * 5, capacity, name).map_err(PyPolarsErr::from)?;
for _ in 0..init_null_count {
builder.append_null()
}
builder.append_opt_series(first_value);
for opt_val in it {
builder.append_opt_series(opt_val.as_ref())
}
builder.finish()
Ok(builder.finish())
}

0 comments on commit 91e089b

Please sign in to comment.