Skip to content

Commit

Permalink
impl explode for nested lists (#4028)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 15, 2022
1 parent 6812c87 commit 9545dd5
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 17 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/builder/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::*;

pub struct BooleanChunkedBuilder {
array_builder: MutableBooleanArray,
pub(crate) array_builder: MutableBooleanArray,
field: Field,
}

Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/builder/utf8.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::*;

pub struct Utf8ChunkedBuilder {
pub builder: MutableUtf8Array<i64>,
pub(crate) builder: MutableUtf8Array<i64>,
pub capacity: usize,
field: Field,
}
Expand Down
71 changes: 57 additions & 14 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::chunked_array::builder::AnonymousOwnedListBuilder;
use crate::prelude::*;
use arrow::bitmap::Bitmap;
use arrow::{array::*, bitmap::MutableBitmap, buffer::Buffer};
Expand Down Expand Up @@ -141,9 +142,13 @@ impl ExplodeByOffsets for BooleanChunked {
if o == last {
if start != last {
let vals = arr.slice(start, last - start);
let vals_ref = vals.as_any().downcast_ref::<BooleanArray>().unwrap();
for val in vals_ref {
builder.append_option(val)

if vals.null_count() == 0 {
builder
.array_builder
.extend_trusted_len_values(vals.values_iter())
} else {
builder.array_builder.extend_trusted_len(vals.into_iter());
}
}
builder.append_null();
Expand All @@ -152,16 +157,48 @@ impl ExplodeByOffsets for BooleanChunked {
last = o;
}
let vals = arr.slice(start, last - start);
let vals_ref = vals.as_any().downcast_ref::<BooleanArray>().unwrap();
for val in vals_ref {
builder.append_option(val)
if vals.null_count() == 0 {
builder
.array_builder
.extend_trusted_len_values(vals.values_iter())
} else {
builder.array_builder.extend_trusted_len(vals.into_iter());
}
builder.finish().into()
}
}
impl ExplodeByOffsets for ListChunked {
fn explode_by_offsets(&self, _offsets: &[i64]) -> Series {
panic!("cannot explode List of Lists")
fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
debug_assert_eq!(self.chunks.len(), 1);
let arr = self.downcast_iter().next().unwrap();

let cap = ((arr.len() as f32) * 1.5) as usize;
let inner_type = self.inner_dtype();
let mut builder = AnonymousOwnedListBuilder::new(self.name(), cap, Some(inner_type));

let mut start = offsets[0] as usize;
let mut last = start;
for &o in &offsets[1..] {
let o = o as usize;
if o == last {
if start != last {
let vals = arr.slice(start, last - start);
let ca = ListChunked::from_chunks("", vec![Box::new(vals)]);
for s in &ca {
builder.append_opt_series(s.as_ref())
}
}
builder.append_null();
start = o;
}
last = o;
}
let vals = arr.slice(start, last - start);
let ca = ListChunked::from_chunks("", vec![Box::new(vals)]);
for s in &ca {
builder.append_opt_series(s.as_ref())
}
builder.finish().into()
}
}
impl ExplodeByOffsets for Utf8Chunked {
Expand All @@ -180,9 +217,12 @@ impl ExplodeByOffsets for Utf8Chunked {
if o == last {
if start != last {
let vals = arr.slice(start, last - start);
let vals_ref = vals.as_any().downcast_ref::<LargeStringArray>().unwrap();
for val in vals_ref {
builder.append_option(val)
if vals.null_count() == 0 {
builder
.builder
.extend_trusted_len_values(vals.values_iter())
} else {
builder.builder.extend_trusted_len(vals.into_iter());
}
}
builder.append_null();
Expand All @@ -191,9 +231,12 @@ impl ExplodeByOffsets for Utf8Chunked {
last = o;
}
let vals = arr.slice(start, last - start);
let vals_ref = vals.as_any().downcast_ref::<LargeStringArray>().unwrap();
for val in vals_ref {
builder.append_option(val)
if vals.null_count() == 0 {
builder
.builder
.extend_trusted_len_values(vals.values_iter())
} else {
builder.builder.extend_trusted_len(vals.into_iter());
}
builder.finish().into()
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/dsl/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl StructNameSpace {
.unwrap_or_else(|| panic!("{} not found", name2));
fld.data_type().clone()
} else {
unreachable!()
panic!("not a struct type: {}", dtype);
}
}),
)
Expand Down
22 changes: 22 additions & 0 deletions py-polars/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,3 +532,25 @@ def test_is_in_struct() -> None:
"struct_elem": [{"a": 1, "b": 11}],
"struct_list": [[{"a": 1, "b": 11}, {"a": 2, "b": 12}, {"a": 3, "b": 13}]],
}


def test_nested_explode_4026() -> None:
df = pl.DataFrame(
{
"data": [
[
{"account_id": 10, "values": [1, 2]},
{"account_id": 11, "values": [10, 20]},
]
],
"day": ["monday"],
}
)

assert df.explode("data").to_dict(False) == {
"data": [
{"account_id": 10, "values": [1, 2]},
{"account_id": 11, "values": [10, 20]},
],
"day": ["monday", "monday"],
}

0 comments on commit 9545dd5

Please sign in to comment.