Skip to content

Commit

Permalink
fix(rust, python): properly set null anyvalue if categorical is neste… (
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 4, 2023
1 parent ddd4faa commit d41d0af
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
18 changes: 13 additions & 5 deletions polars/polars-core/src/chunked_array/ops/any_value.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::convert::TryFrom;

#[cfg(feature = "dtype-categorical")]
use polars_arrow::is_valid::IsValid;
#[cfg(feature = "dtype-categorical")]
use polars_utils::sync::SyncPtr;

Expand Down Expand Up @@ -133,11 +135,17 @@ impl<'a> AnyValue<'a> {
let values =
values.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
let arr = &*(keys as *const dyn Array as *const UInt32Array);
let v = arr.value_unchecked(idx);
let DataType::Categorical(Some(rev_map)) = fld.data_type() else {
unimplemented!()
};
AnyValue::Categorical(v, rev_map, SyncPtr::from_const(values))

if arr.is_valid_unchecked(idx) {
let v = arr.value_unchecked(idx);
let DataType::Categorical(Some(rev_map)) = fld.data_type() else {
unimplemented!()
};
AnyValue::Categorical(v, rev_map, SyncPtr::from_const(values))
} else {
AnyValue::Null
}

} else {
arr_to_any_value(&**arr, idx, fld.data_type())
}
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,15 @@ def test_categorical_max_null_5437() -> None:
.with_column(pl.col("strings").cast(pl.Categorical).alias("cats"))
.select(pl.all().max())
).to_dict(False) == {"strings": ["c"], "values": [3], "cats": [None]}


def test_categorical_in_struct_nulls() -> None:
s = pl.Series(
"job", ["doctor", "waiter", None, None, None, "doctor"], pl.Categorical
)
df = pl.DataFrame([s])
s = (df.select(pl.col("job").value_counts(sort=True)))["job"]

assert s[0] == {"job": None, "counts": 3}
assert s[1] == {"job": "doctor", "counts": 2}
assert s[2] == {"job": "waiter", "counts": 1}

0 comments on commit d41d0af

Please sign in to comment.