Skip to content

Commit

Permalink
fix(rust, python): correct invalid type in struct anyvalue access (#5844
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ritchie46 committed Dec 18, 2022
1 parent 36c72c8 commit db580af
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
14 changes: 10 additions & 4 deletions polars/polars-core/src/chunked_array/ops/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,16 @@ impl<'a> AnyValue<'a> {
AnyValue::Struct(idx, arr, flds) => {
let idx = *idx;
unsafe {
arr.values()
.iter()
.zip(*flds)
.map(move |(arr, fld)| arr_to_any_value(&**arr, idx, fld.data_type()))
arr.values().iter().zip(*flds).map(move |(arr, fld)| {
// TODO! this is hacky. Investigate if we only should put physical types
// into structs
if let Some(arr) = arr.as_any().downcast_ref::<DictionaryArray<u32>>() {
let keys = arr.keys();
arr_to_any_value(keys, idx, fld.data_type())
} else {
arr_to_any_value(&**arr, idx, fld.data_type())
}
})
}
}
_ => unreachable!(),
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-ops/src/series/ops/various.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub trait SeriesMethods: SeriesSealed {
let groups = s.group_tuples(multithreaded, sorted)?;
let values = unsafe { s.agg_first(&groups) };
let counts = groups.group_lengths("counts");
let cols = vec![values.into_series(), counts.into_series()];
let cols = vec![values, counts.into_series()];
let df = DataFrame::new_no_checks(cols);
if sorted {
df.sort(["counts"], true)
Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/unit/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,3 +704,17 @@ def test_struct_any_value_get_after_append() -> None:
a = a.append(b)
assert a[0] == {"a": 1, "b": 2}
assert a[1] == {"a": 2, "b": 3}


def test_struct_categorical_5843() -> None:
df = pl.DataFrame({"foo": ["a", "b", "c", "a"]}).with_column(
pl.col("foo").cast(pl.Categorical)
)
result = df.select(pl.col("foo").value_counts(sort=True))
assert result.to_dict(False) == {
"foo": [
{"foo": "a", "counts": 2},
{"foo": "b", "counts": 1},
{"foo": "c", "counts": 1},
]
}

0 comments on commit db580af

Please sign in to comment.