Skip to content

Commit d650fd8

Browse files
committed
Fix selecting and deleting fields from zero-chunked ext-array
1 parent cb5b36d commit d650fd8

File tree

2 files changed

+17
-22
lines changed

2 files changed

+17
-22
lines changed

src/nested_pandas/series/ext_array.py

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,17 +1003,9 @@ def view_fields(self, fields: str | list[str]) -> Self: # type: ignore[name-def
10031003
if not set(fields).issubset(self.field_names):
10041004
raise ValueError(f"Some fields are not found, given: {fields}, available: {self.field_names}")
10051005

1006-
chunks = []
1007-
for chunk in self.struct_array.iterchunks():
1008-
chunk = cast(pa.StructArray, chunk)
1009-
struct_dict = {}
1010-
for field in fields:
1011-
struct_dict[field] = chunk.field(field)
1012-
struct_array = pa.StructArray.from_arrays(struct_dict.values(), struct_dict.keys())
1013-
chunks.append(struct_array)
1014-
pa_array = pa.chunked_array(chunks)
1015-
1016-
return type(self)(pa_array, validate=False)
1006+
new = self.copy()
1007+
new.pa_table = new.pa_table.select(fields)
1008+
return new
10171009

10181010
def set_flat_field(self, field: str, value: ArrayLike, *, keep_dtype: bool = False) -> None:
10191011
"""Set the field from flat-array of values
@@ -1171,16 +1163,9 @@ def pop_fields(self, fields: Iterable[str]):
11711163
if not fields.issubset(self.field_names):
11721164
raise ValueError(f"Some fields are not found, given: {fields}, available: {self.field_names}")
11731165

1174-
if len(self.field_names) - len(fields) == 0:
1166+
fields_to_keep = [field for field in self.field_names if field not in fields]
1167+
1168+
if len(fields_to_keep) == 0:
11751169
raise ValueError("Cannot delete all fields")
11761170

1177-
chunks = []
1178-
for chunk in self.struct_array.iterchunks():
1179-
chunk = cast(pa.StructArray, chunk)
1180-
struct_dict = {}
1181-
for pa_field in chunk.type:
1182-
if pa_field.name not in fields:
1183-
struct_dict[pa_field.name] = chunk.field(pa_field.name)
1184-
struct_array = pa.StructArray.from_arrays(struct_dict.values(), struct_dict.keys())
1185-
chunks.append(struct_array)
1186-
self.struct_array = pa.chunked_array(chunks)
1171+
self.pa_table = self.pa_table.select(fields_to_keep)

tests/nested_pandas/series/test_ext_array.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,6 +1813,16 @@ def test_pop_fields_raises_for_some_invalid_fields():
18131813
ext_array.pop_fields(["a", "c"])
18141814

18151815

1816+
def test_pop_fields_zero_chunks():
1817+
"""Tests that we can pop fields even when the underlying array has no chunks."""
1818+
ext_array = NestedExtensionArray(
1819+
pa.chunked_array([], type=pa.struct({"a": pa.list_(pa.int64()), "b": pa.list_(pa.int64())}))
1820+
)
1821+
assert ext_array.num_chunks == 0, "Test setup is invalid"
1822+
ext_array.pop_fields(["a"])
1823+
assert ext_array._pyarrow_dtype == pa.struct({"b": pa.list_(pa.int64())})
1824+
1825+
18161826
def test_delete_last_field_raises():
18171827
"""Tests that we raise an error when trying to delete the last field left."""
18181828
struct_array = pa.StructArray.from_arrays(

0 commit comments

Comments
 (0)