Skip to content

Commit

Permalink
fix filter list with logical types
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 13, 2021
1 parent 82c3694 commit bfe17da
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
6 changes: 5 additions & 1 deletion polars/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,11 @@ impl ChunkFilter<ListType> for ListChunked {
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap().into())
.collect::<Vec<_>>();
Ok(ChunkedArray::new_from_chunks(self.name(), chunks))

// inner type may be categorical or logical type so we clone the state.
let mut ca = self.clone();
ca.chunks = chunks;
Ok(ca)
}
}

Expand Down
26 changes: 26 additions & 0 deletions py-polars/tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,29 @@ def test_dtype() -> None:
a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
assert a.dtype == pl.List
assert a.inner_dtype == pl.Int64


def test_categorical() -> None:
# https://github.com/pola-rs/polars/issues/2038
df = pl.DataFrame(
[
pl.Series("a", [1, 1, 1, 1, 1, 1, 1, 1]),
pl.Series("b", [8, 2, 3, 6, 3, 6, 2, 2]),
pl.Series("c", ["a", "b", "c", "a", "b", "c", "a", "b"]).cast(
pl.Categorical
),
]
)
out = (
df.groupby(["a", "b"])
.agg(
[
pl.col("c").count().alias("num_different_c"),
pl.col("c").alias("c_values"),
]
)
.filter(pl.col("num_different_c") >= 2)
.to_series(3)
)

assert out.inner_dtype == pl.Categorical

0 comments on commit bfe17da

Please sign in to comment.