Skip to content

Commit

Permalink
fix struct list concat (#3435)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 19, 2022
1 parent e21bdef commit 58b17b0
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 2 deletions.
6 changes: 6 additions & 0 deletions polars/polars-core/src/chunked_array/logical/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ impl StructChunked {
&self.arrow_array
}

pub fn rechunk(&mut self) {
let (arrow_array, fields) = fields_to_struct_array(&self.fields);
self.arrow_array = arrow_array;
self.fields = fields;
}

/// Does not check the lengths of the fields
pub(crate) fn new_unchecked(name: &str, fields: &[Series]) -> Self {
let dtype = DataType::Struct(
Expand Down
4 changes: 3 additions & 1 deletion polars/polars-core/src/series/implementations/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ impl SeriesTrait for SeriesWrap<StructChunked> {

/// Aggregate all chunks to a contiguous array of memory.
fn rechunk(&self) -> Series {
self.0.apply_fields(|s| s.rechunk()).into_series()
let mut out = self.0.clone();
out.rechunk();
out.into_series()
}

fn expand_at_index(&self, index: usize, length: usize) -> Series {
Expand Down
15 changes: 14 additions & 1 deletion polars/polars-ops/src/chunked_array/list/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ pub trait ListNameSpaceImpl: AsList {
let length = ca.len();
let mut other = other.to_vec();
let dtype = ca.dtype();
dbg!(&ca, ca.dtype());
let inner_type = ca.inner_dtype();

// broadcasting path in case all unit length
Expand Down Expand Up @@ -256,6 +255,13 @@ pub trait ListNameSpaceImpl: AsList {
for append in &to_append {
s.append(append).unwrap();
}
match inner_type {
// structs don't have chunks, so we must first rechunk the underlying series
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => s = s.rechunk(),
// nothing
_ => {}
}
s
});
builder.append_opt_series(opt_s.as_ref())
Expand Down Expand Up @@ -310,6 +316,13 @@ pub trait ListNameSpaceImpl: AsList {
}
}
}
match inner_type {
// structs don't have chunks, so we must first rechunk the underlying series
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => acc = acc.rechunk(),
// nothing
_ => {}
}
builder.append_series(&acc);
}
Ok(builder.finish())
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,23 @@ def test_struct_arr_methods() -> None:
assert df.select([pl.col("list_struct").arr.get(0)]).to_dict(False) == {
"list_struct": [{"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}]
}


def test_struct_concat_list() -> None:
assert pl.DataFrame(
{
"list_struct1": [
[{"a": 1, "b": 2}, {"a": 3, "b": 4}],
[{"a": 1, "b": 2}],
],
"list_struct2": [
[{"a": 6, "b": 7}, {"a": 8, "b": 9}],
[{"a": 6, "b": 7}],
],
}
).with_columns([pl.col("list_struct1").arr.concat("list_struct2").alias("result")])[
"result"
].to_list() == [
[{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 6, "b": 7}, {"a": 8, "b": 9}],
[{"a": 1, "b": 2}, {"a": 6, "b": 7}],
]

0 comments on commit 58b17b0

Please sign in to comment.