Skip to content

Commit

Permalink
fix dataframe explode with empty lists (#3916)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 6, 2022
1 parent b1f2dc5 commit 8b8630b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
17 changes: 12 additions & 5 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ pub(crate) fn offsets_to_indexes(offsets: &[i64], capacity: usize) -> Vec<IdxSiz

let mut count = 0;
let mut last_idx = 0;
let mut previous_empty = false;
for offset in &offsets[1..] {
while count < *offset {
count += 1;
Expand All @@ -217,13 +218,19 @@ pub(crate) fn offsets_to_indexes(offsets: &[i64], capacity: usize) -> Vec<IdxSiz
// we started iterating from 1, so there is always a previous offset
// we take the pointer to the previous element and deref that to get
// the previous offset
unsafe {
let previous_offset = *(offset as *const i64).offset(-1);
if previous_offset != *offset {
last_idx += 1;
}
let previous_offset = unsafe { *(offset as *const i64).offset(-1) };

if !previous_empty && (previous_offset != *offset) {
last_idx += 1;
} else {
count += 1;
idx.push(last_idx);
last_idx += 1;
}
previous_empty = previous_offset == *offset;
}
// last appended index.
let last_idx = idx[idx.len() - 1];
for _ in 0..(capacity - count as usize) {
idx.push(last_idx);
}
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-core/src/frame/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,9 +367,10 @@ mod test {
let out = df.explode(["foo"])?;
let expected = df![
"foo" => [Some(1), Some(2), Some(3), None, Some(1), Some(1), Some(1)],
"B" => [1, 1, 1, 2, 2, 2, 3],
"B" => [1, 1, 1, 2, 3, 3, 3],
"C" => [1, 1, 1, 1, 1, 1, 1],
]?;

assert!(out.frame_equal_missing(&expected));
Ok(())
}
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,19 @@ def test_explode_empty_df_3402() -> None:
def test_explode_empty_df_3460() -> None:
df = pl.DataFrame({"a": pa.array([[]], type=pa.large_list(pa.int32()))})
assert df.explode("a").dtypes == [pl.Int32]


def test_explode_empty_df_3902() -> None:
df = pl.DataFrame(
{
"first": [1, 2, 3, 4, 5],
"second": [["a"], [], ["b", "c"], [], ["d", "f", "g"]],
}
)
expected = pl.DataFrame(
{
"first": [1, 2, 3, 3, 4, 5, 5, 5],
"second": ["a", None, "b", "c", None, "d", "f", "g"],
}
)
assert df.explode("second").frame_equal(expected)

0 comments on commit 8b8630b

Please sign in to comment.