Skip to content

Commit

Permalink
Backport PR #54591 on branch 2.1.x (Implement any and all for pyarrow…
Browse files Browse the repository at this point in the history
… numpy strings) (#54796)
  • Loading branch information
phofl committed Aug 28, 2023
1 parent 901b5e6 commit 72cb101
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 1 deletion.
13 changes: 13 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,3 +554,16 @@ def value_counts(self, dropna: bool = True):
return Series(
result._values.to_numpy(), index=result.index, name=result.name, copy=False
)

def _reduce(
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
):
if name in ["any", "all"]:
arr = pc.and_kleene(
pc.invert(pc.is_null(self._pa_array)), pc.not_equal(self._pa_array, "")
)
return ArrowExtensionArray(arr)._reduce(
name, skipna=skipna, keepdims=keepdims, **kwargs
)
else:
return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def check_reduce(self, s, op_name, skipna):

try:
alt = s.astype("float64")
except TypeError:
except (TypeError, ValueError):
# e.g. Interval can't cast, so let's cast to object and do
# the reduction pointwise
alt = s.astype(object)
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ def test_fillna_no_op_returns_copy(self, data):


class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
return (
ser.dtype.storage == "pyarrow_numpy" # type: ignore[union-attr]
and op_name in ("any", "all")
)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,25 @@ def test_any_all_datetimelike(self):
assert df.any().all()
assert not df.all().any()

def test_any_all_pyarrow_string(self):
# GH#54591
pytest.importorskip("pyarrow")
ser = Series(["", "a"], dtype="string[pyarrow_numpy]")
assert ser.any()
assert not ser.all()

ser = Series([None, "a"], dtype="string[pyarrow_numpy]")
assert ser.any()
assert not ser.all()

ser = Series([None, ""], dtype="string[pyarrow_numpy]")
assert not ser.any()
assert not ser.all()

ser = Series(["a", "b"], dtype="string[pyarrow_numpy]")
assert ser.any()
assert ser.all()

def test_timedelta64_analytics(self):
# index min/max
dti = date_range("2012-1-1", periods=3, freq="D")
Expand Down

0 comments on commit 72cb101

Please sign in to comment.