Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Bug fixes
with a compiled regex and custom flags (:issue:`62240`)
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
- Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`)

Improvements and fixes for Copy-on-Write
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Any,
Literal,
Self,
cast,
)

import numpy as np
Expand Down Expand Up @@ -48,6 +49,7 @@
)

from pandas import Index
from pandas.arrays import StringArray


class NumpyExtensionArray(
Expand Down Expand Up @@ -234,6 +236,16 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# e.g. test_np_max_nested_tuples
return result
else:
if self.dtype.type is str: # type: ignore[comparison-overlap]
Comment on lines 238 to +239
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Would be a nice if this was elif so it could be un-indented one level, but not a big deal.

# StringDtype
self = cast("StringArray", self)
try:
# specify dtype to preserve storage/na_value
return type(self)(result, dtype=self.dtype)
except ValueError:
# if validation of input fails (no strings)
# -> fallback to returning raw numpy array
return result
# one return value; re-box array-like results
return type(self)(result)

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,3 +840,30 @@ def test_string_array_view_type_error():
arr = pd.array(["a", "b", "c"], dtype="string")
with pytest.raises(TypeError, match="Cannot change data-type for string array."):
arr.view("i8")


@pytest.mark.parametrize("box", [pd.Series, pd.array])
def test_numpy_array_ufunc(dtype, box):
arr = box(["a", "bb", "ccc"], dtype=dtype)

# custom ufunc that works with string (object) input -> returning numeric
str_len_ufunc = np.frompyfunc(lambda x: len(x), 1, 1)
result = str_len_ufunc(arr)
expected_cls = pd.Series if box is pd.Series else np.array
# TODO we should infer int64 dtype here?
expected = expected_cls([1, 2, 3], dtype=object)
tm.assert_equal(result, expected)

# custom ufunc returning strings
str_multiply_ufunc = np.frompyfunc(lambda x: x * 2, 1, 1)
result = str_multiply_ufunc(arr)
expected = box(["aa", "bbbb", "cccccc"], dtype=dtype)
if dtype.storage == "pyarrow":
# TODO ArrowStringArray should also preserve the class / dtype
if box is pd.array:
expected = np.array(["aa", "bbbb", "cccccc"], dtype=object)
else:
# not specifying the dtype because the exact dtype is not yet preserved
expected = pd.Series(["aa", "bbbb", "cccccc"])

tm.assert_equal(result, expected)
Loading