Skip to content

Commit

Permalink
Improve struct (#3468)
Browse files Browse the repository at this point in the history
* struct check field names not dtypes

* implement Series::full_null for Struct

* fix unique for struct
  • Loading branch information
ritchie46 committed May 22, 2022
1 parent 4a39576 commit 69c57d4
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 4 deletions.
4 changes: 3 additions & 1 deletion polars/polars-core/src/frame/groupby/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ impl DataFrame {
}};
}

if by.is_empty() || by[0].len() != self.height() {
// we only throw this error if self.width > 0
// so that we can still call this on a dummy dataframe where we provide the keys
if by.is_empty() || (by[0].len() != (self.height()) && (self.width() > 0)) {
return Err(PolarsError::ShapeMisMatch(
"the Series used as keys should have the same length as the DataFrame".into(),
));
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ impl Series {
match av {
AnyValue::StructOwned(pl) => {
for (l, r) in fields.iter().zip(pl.1.iter()) {
if l != r {
if l.name() != r.name() {
return Err(PolarsError::ComputeError(
"structs orders must remain the same".into(),
"struct orders must remain the same".into(),
));
}
}
Expand Down
8 changes: 8 additions & 0 deletions polars/polars-core/src/series/ops/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ impl Series {
DataType::Time => Int64Chunked::full_null(name, size)
.into_time()
.into_series(),
#[cfg(feature = "dtype-struct")]
DataType::Struct(fields) => {
let fields = fields
.iter()
.map(|fld| Series::full_null(fld.name(), size, fld.data_type()))
.collect::<Vec<_>>();
StructChunked::new(name, &fields).unwrap().into_series()
}
DataType::Null => ChunkedArray::new_null("", size).into_series(),
_ => {
macro_rules! primitive {
Expand Down
1 change: 1 addition & 0 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,7 @@ impl_set_at_idx!(set_at_idx_i8, i8, i8, Int8);
impl_set_at_idx!(set_at_idx_i16, i16, i16, Int16);
impl_set_at_idx!(set_at_idx_i32, i32, i32, Int32);
impl_set_at_idx!(set_at_idx_i64, i64, i64, Int64);
impl_set_at_idx!(set_at_idx_bool, bool, bool, Boolean);

macro_rules! impl_get {
($name:ident, $series_variant:ident, $type:ty) => {
Expand Down
47 changes: 46 additions & 1 deletion py-polars/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,13 +415,34 @@ def test_struct_comparison() -> None:


def test_struct_order() -> None:
with pytest.raises(pl.ComputeError, match="structs orders must remain the same"):
with pytest.raises(pl.ComputeError, match="struct orders must remain the same"):
pl.DataFrame(
{
"col1": [{"a": 1, "b": 2}, {"b": 4, "a": 3}],
}
)

# null values should not trigger this
assert (
pl.Series(
values=[
{"a": 1, "b": None},
{"a": 2, "b": 20},
],
).to_list()
== [{"a": 1, "b": None}, {"a": 2, "b": 20}]
)

assert (
pl.Series(
values=[
{"a": 1, "b": 10},
{"a": 2, "b": None},
],
).to_list()
== [{"a": 1, "b": 10}, {"a": 2, "b": None}]
)


def test_struct_schema_on_append_extend_3452() -> None:
housing1_data = [
Expand Down Expand Up @@ -464,3 +485,27 @@ def test_struct_schema_on_append_extend_3452() -> None:
match="cannot extend field with name: address to struct with field name: city, please check your schema",
):
housing1.append(housing2, append_chunks=False)


def test_struct_arr_eval() -> None:
df = pl.DataFrame(
{"col_struct": [[{"a": 1, "b": 11}, {"a": 2, "b": 12}, {"a": 1, "b": 11}]]}
)
assert df.with_column(
pl.col("col_struct").arr.eval(pl.element().first()).alias("first")
).to_dict(False) == {
"col_struct": [[{"a": 1, "b": 11}, {"a": 2, "b": 12}, {"a": 1, "b": 11}]],
"first": [[{"a": 1, "b": 11}]],
}


def test_arr_unique() -> None:
df = pl.DataFrame(
{"col_struct": [[{"a": 1, "b": 11}, {"a": 2, "b": 12}, {"a": 1, "b": 11}]]}
)
assert df.with_column(pl.col("col_struct").arr.unique().alias("unique")).to_dict(
False
) == {
"col_struct": [[{"a": 1, "b": 11}, {"a": 2, "b": 12}, {"a": 1, "b": 11}]],
"unique": [[{"a": 2, "b": 12}, {"a": 1, "b": 11}]],
}

0 comments on commit 69c57d4

Please sign in to comment.