Skip to content

Commit

Permalink
Backport PR #52821 on branch 2.0.x (BUG: Non unitless np NaT arithmet…
Browse files Browse the repository at this point in the history
…ic with non-nano) (#52847)

BUG: Non unitless np NaT arithmetic with non-nano (#52821)

(cherry picked from commit eb9a3e8)

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
phofl and mroeschke committed Apr 22, 2023
1 parent fa94d3b commit e52861d
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Bug fixes
- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
- Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)
- Bug in arithmetic between ``np.datetime64`` and ``np.timedelta64`` ``NaT`` scalars with units always returning nanosecond resolution (:issue:`52295`)
- Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`)
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
- Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)
Expand Down
25 changes: 22 additions & 3 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@
lib,
ops as libops,
)
from pandas._libs.tslibs import BaseOffset
from pandas._libs.tslibs import (
BaseOffset,
get_supported_reso,
get_unit_from_dtype,
is_supported_unit,
is_unitless,
npy_unit_to_abbrev,
)
from pandas._typing import (
ArrayLike,
Shape,
Expand Down Expand Up @@ -475,7 +482,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
from pandas.core.arrays import DatetimeArray

# Avoid possible ambiguities with pd.NaT
obj = obj.astype("datetime64[ns]")
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("datetime64[ns]")
elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
unit = get_unit_from_dtype(obj.dtype)
closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
obj = obj.astype(f"datetime64[{closest_unit}]")
right = np.broadcast_to(obj, shape)
return DatetimeArray(right)

Expand All @@ -488,7 +501,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# wrapping timedelta64("NaT") in Timedelta returns NaT,
# which would incorrectly be treated as a datetime-NaT, so
# we broadcast and wrap in a TimedeltaArray
obj = obj.astype("timedelta64[ns]")
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("timedelta64[ns]")
elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
unit = get_unit_from_dtype(obj.dtype)
closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
obj = obj.astype(f"timedelta64[{closest_unit}]")
right = np.broadcast_to(obj, shape)
return TimedeltaArray(right)

Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2436,3 +2436,40 @@ def test_dt64arr_addsub_object_dtype_2d():

assert result2.shape == (4, 1)
assert all(td._value == 0 for td in result2.ravel())


def test_non_nano_dt64_addsub_np_nat_scalars():
# GH 52295
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
result = ser - np.datetime64("nat", "ms")
expected = Series([NaT] * 3, dtype="timedelta64[ms]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat", "ms")
expected = Series([NaT] * 3, dtype="datetime64[ms]")
tm.assert_series_equal(result, expected)


def test_non_nano_dt64_addsub_np_nat_scalars_unitless():
# GH 52295
# TODO: Can we default to the ser unit?
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
result = ser - np.datetime64("nat")
expected = Series([NaT] * 3, dtype="timedelta64[ns]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat")
expected = Series([NaT] * 3, dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)


def test_non_nano_dt64_addsub_np_nat_scalars_unsupported_unit():
# GH 52295
ser = Series([12332, 23243, 33243], dtype="datetime64[s]")
result = ser - np.datetime64("nat", "D")
expected = Series([NaT] * 3, dtype="timedelta64[s]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat", "D")
expected = Series([NaT] * 3, dtype="datetime64[s]")
tm.assert_series_equal(result, expected)
1 change: 1 addition & 0 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
np.datetime64("NaT", "ns"),
pd.NaT,
],
ids=repr,
)
def test_add_sub_datetimedeltalike_invalid(
self, numeric_idx, other, box_with_array
Expand Down

0 comments on commit e52861d

Please sign in to comment.