Skip to content

Commit

Permalink
fix(rust, python): fix invalid dtype in chunked array after struct ca…
Browse files Browse the repository at this point in the history
…st (#6093)
  • Loading branch information
ritchie46 committed Jan 7, 2023
1 parent 8150e71 commit 47889de
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 6 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {

/// Returns true if contains a single chunk and has no null values
pub fn is_optimal_aligned(&self) -> bool {
self.chunks.len() == 1 && !self.has_validity()
self.chunks.len() == 1 && self.null_count() == 0
}

/// Count the null values.
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ where
T: PolarsNumericType,
{
sort_with_fast_path!(ca, options);
if !ca.has_validity() {
if ca.null_count() == 0 {
let mut vals = memcpy_values(ca);

sort_branch(
Expand Down
13 changes: 9 additions & 4 deletions polars/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -694,9 +694,14 @@ impl Series {
}

/// Cast throws an error if conversion had overflows
pub fn strict_cast(&self, data_type: &DataType) -> PolarsResult<Series> {
let s = self.cast(data_type)?;
if self.null_count() != s.null_count() {
pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Series> {
let null_count = self.null_count();
let len = self.len();
if null_count == len {
return Ok(Series::full_null(self.name(), len, dtype));
}
let s = self.0.cast(dtype)?;
if null_count != s.null_count() {
let failure_mask = !self.is_null() & s.is_null();
let failures = self.filter_threaded(&failure_mask, false)?.unique()?;
Err(PolarsError::ComputeError(
Expand All @@ -705,7 +710,7 @@ impl Series {
If you were trying to cast Utf8 to Date, Time, or Datetime, \
consider using `strptime`.",
self.dtype(),
data_type,
dtype,
failures.fmt_list(),
)
.into(),
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/unit/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,3 +730,19 @@ def test_struct_empty() -> None:
# Empty struct
df = pl.DataFrame({"a": [{}]})
assert df.to_dict(False) == {"a": [{"": None}]}


def test_struct_null_cast() -> None:
dtype = pl.Struct(
[
pl.Field("a", pl.Int64),
pl.Field("b", pl.Utf8),
pl.Field("c", pl.List(pl.Float64)),
]
)
assert (
pl.DataFrame()
.lazy()
.select([pl.lit(None, dtype=pl.Null).cast(dtype, strict=True)])
.collect()
).to_dict(False) == {"": [{"a": None, "b": None, "c": None}]}

0 comments on commit 47889de

Please sign in to comment.