From d650fd85a1b76ecb78f8fb5817309d647583e62d Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Mon, 29 Sep 2025 15:16:30 -0400 Subject: [PATCH] Fix selecting and deleting fields from zero-chunked ext-array --- src/nested_pandas/series/ext_array.py | 29 +++++--------------- tests/nested_pandas/series/test_ext_array.py | 10 +++++++ 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py index 582f0a53..86de6f94 100644 --- a/src/nested_pandas/series/ext_array.py +++ b/src/nested_pandas/series/ext_array.py @@ -1003,17 +1003,9 @@ def view_fields(self, fields: str | list[str]) -> Self: # type: ignore[name-def if not set(fields).issubset(self.field_names): raise ValueError(f"Some fields are not found, given: {fields}, available: {self.field_names}") - chunks = [] - for chunk in self.struct_array.iterchunks(): - chunk = cast(pa.StructArray, chunk) - struct_dict = {} - for field in fields: - struct_dict[field] = chunk.field(field) - struct_array = pa.StructArray.from_arrays(struct_dict.values(), struct_dict.keys()) - chunks.append(struct_array) - pa_array = pa.chunked_array(chunks) - - return type(self)(pa_array, validate=False) + new = self.copy() + new.pa_table = new.pa_table.select(fields) + return new def set_flat_field(self, field: str, value: ArrayLike, *, keep_dtype: bool = False) -> None: """Set the field from flat-array of values @@ -1171,16 +1163,9 @@ def pop_fields(self, fields: Iterable[str]): if not fields.issubset(self.field_names): raise ValueError(f"Some fields are not found, given: {fields}, available: {self.field_names}") - if len(self.field_names) - len(fields) == 0: + fields_to_keep = [field for field in self.field_names if field not in fields] + + if len(fields_to_keep) == 0: raise ValueError("Cannot delete all fields") - chunks = [] - for chunk in self.struct_array.iterchunks(): - chunk = cast(pa.StructArray, chunk) - struct_dict = {} - for pa_field in chunk.type: - if pa_field.name not in fields: - struct_dict[pa_field.name] = chunk.field(pa_field.name) - struct_array = pa.StructArray.from_arrays(struct_dict.values(), struct_dict.keys()) - chunks.append(struct_array) - self.struct_array = pa.chunked_array(chunks) + self.pa_table = self.pa_table.select(fields_to_keep) diff --git a/tests/nested_pandas/series/test_ext_array.py b/tests/nested_pandas/series/test_ext_array.py index b5ee9c7f..88afb890 100644 --- a/tests/nested_pandas/series/test_ext_array.py +++ b/tests/nested_pandas/series/test_ext_array.py @@ -1813,6 +1813,16 @@ def test_pop_fields_raises_for_some_invalid_fields(): ext_array.pop_fields(["a", "c"]) +def test_pop_fields_zero_chunks(): + """Tests that we can pop fields even when the underlying array has no chunks.""" + ext_array = NestedExtensionArray( + pa.chunked_array([], type=pa.struct({"a": pa.list_(pa.int64()), "b": pa.list_(pa.int64())})) + ) + assert ext_array.num_chunks == 0, "Test setup is invalid" + ext_array.pop_fields(["a"]) + assert ext_array._pyarrow_dtype == pa.struct({"b": pa.list_(pa.int64())}) + + def test_delete_last_field_raises(): """Tests that we raise an error when trying to delete the last field left.""" struct_array = pa.StructArray.from_arrays(