Skip to content

Commit

Permalink
Backport PR #52075 on branch 2.0.x (BUG: Arrow setitem segfaults when…
Browse files Browse the repository at this point in the history
… len > 145 000) (#52259)

* BUG: Arrow setitem segfaults when len > 145 000 (#52075)

* BUG: Arrow setitem segfaults when len > 145 000

* Add gh ref

* Address review

* Restrict to bool type

(cherry picked from commit 10000db)

* _data

---------

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
mroeschke and phofl committed Mar 28, 2023
1 parent ab9885e commit 9fc32de
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,10 @@ def _replace_with_mask(
indices = pa.array(indices, type=pa.int64())
replacements = replacements.take(indices)
return cls._if_else(mask, replacements, values)
if isinstance(values, pa.ChunkedArray) and pa.types.is_boolean(values.type):
# GH#52059 replace_with_mask segfaults for chunked array
# https://github.com/apache/arrow/issues/34634
values = values.combine_chunks()
try:
return pc.replace_with_mask(values, mask, replacements)
except pa.ArrowNotImplementedError:
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2333,3 +2333,12 @@ def test_series_from_string_array(dtype):
ser = pd.Series(arr, dtype=dtype)
expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype)
tm.assert_series_equal(ser, expected)


def test_setitem_boolean_replace_with_mask_segfault():
# GH#52059
N = 145_000
arr = ArrowExtensionArray(pa.chunked_array([np.ones((N,), dtype=np.bool_)]))
expected = arr.copy()
arr[np.zeros((N,), dtype=np.bool_)] = False
assert arr._data == expected._data

0 comments on commit 9fc32de

Please sign in to comment.