Skip to content

Commit

Permalink
fix explode for sliced arrays (#4115)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 22, 2022
1 parent dd0cbbc commit 91f0ae5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 9 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
.unwrap()]
}

if self.chunks().len() == 1 {
if self.chunks.len() == 1 {
self.clone()
} else {
let chunks = inner_rechunk(&self.chunks);
Expand Down
19 changes: 11 additions & 8 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,13 @@ pub(crate) fn offsets_to_indexes(offsets: &[i64], capacity: usize) -> Vec<IdxSiz
let mut idx = Vec::with_capacity(capacity);

// `value_count` counts the taken values from the list values
// and aret the same unit as `offsets`
let mut value_count = 0;
// and are the same unit as `offsets`
// we also add the start offset as a list can be sliced
let mut value_count = offsets[0];
// `empty_count` counts the duplicates taken because of empty list
let mut empty_count = 0;
let mut empty_count = 0usize;
let mut last_idx = 0;

for offset in &offsets[1..] {
// this get all the elements up till offsets
while value_count < *offset {
Expand All @@ -280,7 +282,7 @@ pub(crate) fn offsets_to_indexes(offsets: &[i64], capacity: usize) -> Vec<IdxSiz
}

// take the remaining values
for _ in 0..(capacity - value_count as usize - empty_count as usize) {
for _ in 0..(capacity - (value_count - offsets[0]) as usize - empty_count) {
idx.push(last_idx);
}
idx
Expand Down Expand Up @@ -308,13 +310,14 @@ impl ChunkExplode for ListChunked {
));
}

// ensure that the value array is sliced
// as a list only slices its offsets on a slice operation
if !offsets.is_empty() {
let offset = offsets[0];
let start = offsets[0] as usize;
let len = offsets[offsets.len() - 1] as usize - start;
// safety:
// we are in bounds
values = unsafe {
values.slice_unchecked(offset as usize, offsets[offsets.len() - 1] as usize)
};
values = unsafe { values.slice_unchecked(start, len) };
}

let mut s = if ca.can_fast_explode() {
Expand Down
28 changes: 28 additions & 0 deletions py-polars/tests/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,31 @@ def test_explode_empty_list_4107() -> None:
pl.testing.assert_frame_equal(
df.explode(["b"]), df.explode(["b"]).drop("row_nr").with_row_count()
)


def explode_correct_for_slice() -> None:
df = pl.DataFrame({"b": [[1, 1], [2, 2], [3, 3], [4, 4]]})
assert df.slice(2, 2).explode(["b"])["b"].to_list() == [3, 3, 4, 4]

df = (
(
pl.DataFrame({"group": pl.arange(0, 5, eager=True)}).join(
pl.DataFrame(
{
"b": [[1, 2, 3], [2, 3], [4], [1, 2, 3], [0]],
}
),
how="cross",
)
)
.sort("group")
.with_row_count()
)
expected = pl.DataFrame(
{
"row_nr": [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9],
"group": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
"b": [1, 2, 3, 2, 3, 4, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 1, 2, 3, 0],
}
)
assert df.slice(0, 10).explode(["b"]).frame_equal(expected)

0 comments on commit 91f0ae5

Please sign in to comment.