Skip to content

Commit

Permalink
Backport PR #51688 on branch 2.0.x (BUG: ArrowExtensionArray logical …
Browse files Browse the repository at this point in the history
…ops raising KeyError) (#51810)

Backport PR #51688: BUG: ArrowExtensionArray logical ops raising KeyError

Co-authored-by: Luke Manley <lukemanley@gmail.com>
  • Loading branch information
meeseeksmachine and lukemanley committed Mar 7, 2023
1 parent 6808720 commit 6d5804b
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1368,6 +1368,7 @@ ExtensionArray
- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
- Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`)
- Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`)
- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`)

Styler
^^^^^^
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@
}

ARROW_LOGICAL_FUNCS = {
"and": pc.and_kleene,
"rand": lambda x, y: pc.and_kleene(y, x),
"or": pc.or_kleene,
"ror": lambda x, y: pc.or_kleene(y, x),
"and_": pc.and_kleene,
"rand_": lambda x, y: pc.and_kleene(y, x),
"or_": pc.or_kleene,
"ror_": lambda x, y: pc.or_kleene(y, x),
"xor": pc.xor,
"rxor": lambda x, y: pc.xor(y, x),
}
Expand Down Expand Up @@ -491,7 +491,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
elif isinstance(other, (np.ndarray, list)):
result = pc_func(self._data, pa.array(other, from_pandas=True))
elif is_scalar(other):
result = pc_func(self._data, pa.scalar(other))
if isna(other) and op.__name__ in ARROW_LOGICAL_FUNCS:
# pyarrow kleene ops require null to be typed
pa_scalar = pa.scalar(None, type=self._data.type)
else:
pa_scalar = pa.scalar(other)
result = pc_func(self._data, pa_scalar)
else:
raise NotImplementedError(
f"{op.__name__} not implemented for {type(other)}"
Expand Down
144 changes: 144 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,150 @@ def test_invalid_other_comp(self, data, comparison_op):
comparison_op(data, object())


class TestLogicalOps:
"""Various Series and DataFrame logical ops methods."""

def test_kleene_or(self):
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
result = a | b
expected = pd.Series(
[True, True, True, True, False, None, True, None, None],
dtype="boolean[pyarrow]",
)
tm.assert_series_equal(result, expected)

result = b | a
tm.assert_series_equal(result, expected)

# ensure we haven't mutated anything inplace
tm.assert_series_equal(
a,
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
)
tm.assert_series_equal(
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
)

@pytest.mark.parametrize(
"other, expected",
[
(None, [True, None, None]),
(pd.NA, [True, None, None]),
(True, [True, True, True]),
(np.bool_(True), [True, True, True]),
(False, [True, False, None]),
(np.bool_(False), [True, False, None]),
],
)
def test_kleene_or_scalar(self, other, expected):
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
result = a | other
expected = pd.Series(expected, dtype="boolean[pyarrow]")
tm.assert_series_equal(result, expected)

result = other | a
tm.assert_series_equal(result, expected)

# ensure we haven't mutated anything inplace
tm.assert_series_equal(
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
)

def test_kleene_and(self):
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
result = a & b
expected = pd.Series(
[True, False, None, False, False, False, None, False, None],
dtype="boolean[pyarrow]",
)
tm.assert_series_equal(result, expected)

result = b & a
tm.assert_series_equal(result, expected)

# ensure we haven't mutated anything inplace
tm.assert_series_equal(
a,
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
)
tm.assert_series_equal(
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
)

@pytest.mark.parametrize(
"other, expected",
[
(None, [None, False, None]),
(pd.NA, [None, False, None]),
(True, [True, False, None]),
(False, [False, False, False]),
(np.bool_(True), [True, False, None]),
(np.bool_(False), [False, False, False]),
],
)
def test_kleene_and_scalar(self, other, expected):
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
result = a & other
expected = pd.Series(expected, dtype="boolean[pyarrow]")
tm.assert_series_equal(result, expected)

result = other & a
tm.assert_series_equal(result, expected)

# ensure we haven't mutated anything inplace
tm.assert_series_equal(
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
)

def test_kleene_xor(self):
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
result = a ^ b
expected = pd.Series(
[False, True, None, True, False, None, None, None, None],
dtype="boolean[pyarrow]",
)
tm.assert_series_equal(result, expected)

result = b ^ a
tm.assert_series_equal(result, expected)

# ensure we haven't mutated anything inplace
tm.assert_series_equal(
a,
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
)
tm.assert_series_equal(
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
)

@pytest.mark.parametrize(
"other, expected",
[
(None, [None, None, None]),
(pd.NA, [None, None, None]),
(True, [False, True, None]),
(np.bool_(True), [False, True, None]),
(np.bool_(False), [True, False, None]),
],
)
def test_kleene_xor_scalar(self, other, expected):
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
result = a ^ other
expected = pd.Series(expected, dtype="boolean[pyarrow]")
tm.assert_series_equal(result, expected)

result = other ^ a
tm.assert_series_equal(result, expected)

# ensure we haven't mutated anything inplace
tm.assert_series_equal(
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
)


def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")
Expand Down

0 comments on commit 6d5804b

Please sign in to comment.