Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 7 additions & 22 deletions src/nested_pandas/series/ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,17 +1003,9 @@ def view_fields(self, fields: str | list[str]) -> Self: # type: ignore[name-def
if not set(fields).issubset(self.field_names):
raise ValueError(f"Some fields are not found, given: {fields}, available: {self.field_names}")

chunks = []
for chunk in self.struct_array.iterchunks():
chunk = cast(pa.StructArray, chunk)
struct_dict = {}
for field in fields:
struct_dict[field] = chunk.field(field)
struct_array = pa.StructArray.from_arrays(struct_dict.values(), struct_dict.keys())
chunks.append(struct_array)
pa_array = pa.chunked_array(chunks)

return type(self)(pa_array, validate=False)
new = self.copy()
new.pa_table = new.pa_table.select(fields)
return new

def set_flat_field(self, field: str, value: ArrayLike, *, keep_dtype: bool = False) -> None:
"""Set the field from flat-array of values
Expand Down Expand Up @@ -1171,16 +1163,9 @@ def pop_fields(self, fields: Iterable[str]):
if not fields.issubset(self.field_names):
raise ValueError(f"Some fields are not found, given: {fields}, available: {self.field_names}")

if len(self.field_names) - len(fields) == 0:
fields_to_keep = [field for field in self.field_names if field not in fields]

if len(fields_to_keep) == 0:
raise ValueError("Cannot delete all fields")

chunks = []
for chunk in self.struct_array.iterchunks():
chunk = cast(pa.StructArray, chunk)
struct_dict = {}
for pa_field in chunk.type:
if pa_field.name not in fields:
struct_dict[pa_field.name] = chunk.field(pa_field.name)
struct_array = pa.StructArray.from_arrays(struct_dict.values(), struct_dict.keys())
chunks.append(struct_array)
self.struct_array = pa.chunked_array(chunks)
self.pa_table = self.pa_table.select(fields_to_keep)
10 changes: 10 additions & 0 deletions tests/nested_pandas/series/test_ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,6 +1813,16 @@ def test_pop_fields_raises_for_some_invalid_fields():
ext_array.pop_fields(["a", "c"])


def test_pop_fields_zero_chunks():
"""Tests that we can pop fields even when the underlying array has no chunks."""
ext_array = NestedExtensionArray(
pa.chunked_array([], type=pa.struct({"a": pa.list_(pa.int64()), "b": pa.list_(pa.int64())}))
)
assert ext_array.num_chunks == 0, "Test setup is invalid"
ext_array.pop_fields(["a"])
assert ext_array._pyarrow_dtype == pa.struct({"b": pa.list_(pa.int64())})


def test_delete_last_field_raises():
"""Tests that we raise an error when trying to delete the last field left."""
struct_array = pa.StructArray.from_arrays(
Expand Down
Loading