Skip to content

Commit

Permalink
Backport PR #51541 on branch 2.0.x (ENH: pyarrow dont raise on divisi…
Browse files Browse the repository at this point in the history
…on by zero) (#51553)

Backport PR #51541: ENH: pyarrow dont raise on division by zero

Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
  • Loading branch information
meeseeksmachine and jbrockmendel committed Feb 22, 2023
1 parent 91b18e3 commit a3701f3
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ Other API changes
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)
- :class:`DataFrame` and :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`)
- Division by zero with :class:`ArrowDtype` dtypes returns ``-inf``, ``nan``, or ``inf`` depending on the numerator, instead of raising (:issue:`51541`)
- Added :func:`pandas.api.types.is_any_real_numeric_dtype` to check for real numeric dtypes (:issue:`51152`)

.. note::
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def floordiv_compat(
) -> pa.ChunkedArray:
# Ensure int // int -> int mirroring Python/Numpy behavior
# as pc.floor(pc.divide_checked(int, int)) -> float
result = pc.floor(pc.divide_checked(left, right))
result = pc.floor(pc.divide(left, right))
if pa.types.is_integer(left.type) and pa.types.is_integer(right.type):
result = result.cast(left.type)
return result
Expand All @@ -118,8 +118,8 @@ def floordiv_compat(
"rsub": lambda x, y: pc.subtract_checked(y, x),
"mul": pc.multiply_checked,
"rmul": lambda x, y: pc.multiply_checked(y, x),
"truediv": lambda x, y: pc.divide_checked(cast_for_truediv(x, y), y),
"rtruediv": lambda x, y: pc.divide_checked(y, cast_for_truediv(x, y)),
"truediv": lambda x, y: pc.divide(cast_for_truediv(x, y), y),
"rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)),
"floordiv": lambda x, y: floordiv_compat(x, y),
"rfloordiv": lambda x, y: floordiv_compat(y, x),
"mod": NotImplemented,
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,16 @@ def test_basic_equals(self, data):
class TestBaseArithmeticOps(base.BaseArithmeticOpsTests):
divmod_exc = NotImplementedError

def get_op_from_name(self, op_name):
short_opname = op_name.strip("_")
if short_opname == "rtruediv":
# use the numpy version that won't raise on division by zero
return lambda x, y: np.divide(y, x)
elif short_opname == "rfloordiv":
return lambda x, y: np.floor_divide(y, x)

return tm.get_op_from_name(op_name)

def _patch_combine(self, obj, other, op):
# BaseOpsUtil._combine can upcast expected dtype
# (because it generates expected on python scalars)
Expand Down Expand Up @@ -996,8 +1006,8 @@ def _get_arith_xfail_marker(self, opname, pa_dtype):
),
)
elif (
opname in {"__rtruediv__", "__rfloordiv__"}
and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype))
opname in {"__rfloordiv__"}
and pa.types.is_integer(pa_dtype)
and not pa_version_under7p0
):
mark = pytest.mark.xfail(
Expand Down Expand Up @@ -1111,7 +1121,7 @@ def test_arith_series_with_array(
pa.types.is_floating(pa_dtype)
or (
pa.types.is_integer(pa_dtype)
and all_arithmetic_operators != "__truediv__"
and all_arithmetic_operators not in ["__truediv__", "__rtruediv__"]
)
or pa.types.is_duration(pa_dtype)
or pa.types.is_timestamp(pa_dtype)
Expand Down

0 comments on commit a3701f3

Please sign in to comment.