Skip to content

Commit

Permalink
fix empty list edge case (#3621)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 8, 2022
1 parent 89f94b5 commit b27cb9c
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 6 deletions.
10 changes: 9 additions & 1 deletion polars/polars-arrow/src/array/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use arrow::datatypes::DataType;
use arrow::error::Result;

pub struct AnonymousBuilder<'a> {
pub arrays: Vec<&'a dyn Array>,
arrays: Vec<&'a dyn Array>,
offsets: Vec<i64>,
validity: Option<MutableBitmap>,
size: i64,
Expand All @@ -31,6 +31,14 @@ impl<'a> AnonymousBuilder<'a> {
self.arrays.is_empty()
}

pub fn offsets(&self) -> &[i64] {
&self.offsets
}

pub fn take_offsets(self) -> Vec<i64> {
self.offsets
}

#[inline]
pub fn push(&mut self, arr: &'a dyn Array) {
self.size += arr.len() as i64;
Expand Down
27 changes: 22 additions & 5 deletions polars/polars-core/src/chunked_array/builder/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,28 @@ impl ListBuilderTrait for AnonymousOwnedListBuilder {
fn finish(&mut self) -> ListChunked {
let slf = std::mem::take(self);
if slf.builder.is_empty() {
ListChunked::full_null_with_dtype(
&slf.name,
0,
&slf.inner_dtype.unwrap_or(DataType::Null),
)
// not really empty, there were empty null list added probably e.g. []
let real_length = slf.builder.offsets().len() - 1;
if real_length > 0 {
let dtype = slf.inner_dtype.unwrap_or(NULL_DTYPE).to_arrow();
let array = new_null_array(dtype.clone(), real_length);
let dtype = ListArray::<i64>::default_datatype(dtype);
let array = unsafe {
ListArray::new_unchecked(
dtype,
slf.builder.take_offsets().into(),
Arc::from(array),
None,
)
};
ListChunked::from_chunks(&slf.name, vec![Arc::new(array)])
} else {
ListChunked::full_null_with_dtype(
&slf.name,
0,
&slf.inner_dtype.unwrap_or(DataType::Null),
)
}
} else {
let inner_dtype = slf.inner_dtype.map(|dt| dt.to_physical().to_arrow());
let arr = slf.builder.finish(inner_dtype.as_ref()).unwrap();
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,8 @@ pub type IdxType = UInt32Type;
#[cfg(feature = "bigidx")]
pub type IdxType = UInt64Type;

pub const NULL_DTYPE: DataType = DataType::Int32;

#[cfg(test)]
mod test {
use super::*;
Expand Down
3 changes: 3 additions & 0 deletions polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ impl Debug for Series {
self.name(),
"Series"
),
DataType::Null => {
writeln!(f, "nullarray")
}
dt => panic!("{:?} not impl", dt),
}
}
Expand Down
7 changes: 7 additions & 0 deletions py-polars/tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,10 @@ def test_list_fill_list() -> None:
.alias("filled")
]
).to_dict(False) == {"filled": [[1, 2, 3], [5]]}


def test_empty_list_construction() -> None:
assert pl.Series([[]]).to_list() == [[]]
assert pl.DataFrame([{"array": [], "not_array": 1234}], orient="row").to_dict(
False
) == {"array": [[]], "not_array": [1234]}

0 comments on commit b27cb9c

Please sign in to comment.