diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 921b30b4f91dc8..e0313271a13df0 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -22,6 +22,7 @@ cdef class _Timedelta(timedelta): cpdef timedelta to_pytimedelta(_Timedelta self) cdef bint _has_ns(self) + cdef bint _is_in_pytimedelta_bounds(self) cdef _ensure_components(_Timedelta self) cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op) cdef _Timedelta _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 071cfb7cf541a9..d084b4944b6edb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1093,8 +1093,27 @@ cdef class _Timedelta(timedelta): # non-invariant behavior. # see GH#44504 return hash(self.value) - else: + elif self._is_in_pytimedelta_bounds() and ( + self._creso == NPY_FR_ns or self._creso == NPY_DATETIMEUNIT.NPY_FR_us + ): + # If we can defer to timedelta.__hash__, do so, as that + # ensures the hash is invariant to our _reso. + # We can only defer for ns and us, as for these two resos we + # call _Timedelta.__new__ with the correct input in + # _timedelta_from_value_and_reso; so timedelta.__hash__ + # will be correct return timedelta.__hash__(self) + else: + # We want to ensure that two equivalent Timedelta objects + # have the same hash. So we try downcasting to the next-lowest + # resolution. + try: + obj = (<_Timedelta>self)._as_creso((self._creso + 1)) + except OverflowError: + # Doesn't fit, so we're off the hook + return hash(self.value) + else: + return hash(obj) def __richcmp__(_Timedelta self, object other, int op): cdef: @@ -1152,6 +1171,13 @@ cdef class _Timedelta(timedelta): else: raise NotImplementedError(self._creso) + cdef bint _is_in_pytimedelta_bounds(self): + """ + Check if we are within the bounds of datetime.timedelta. + """ + self._ensure_components() + return -999999999 <= self._d and self._d <= 999999999 + cdef _ensure_components(_Timedelta self): """ compute the components diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c70268bd0aef20..b83e060a666500 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -846,30 +846,19 @@ def create_data(constructor): tm.assert_frame_equal(result_Timestamp, expected) @pytest.mark.parametrize( - "klass", + "klass,name", [ - pytest.param( - np.timedelta64, - marks=pytest.mark.xfail( - reason="hash mismatch (GH#44504) causes lib.fast_multiget " - "to mess up on dict lookups with equal Timedeltas with " - "mismatched resos" - ), - ), - timedelta, - Timedelta, + (lambda x: np.timedelta64(x, "D"), "timedelta64"), + (lambda x: timedelta(days=x), "pytimedelta"), + (lambda x: Timedelta(x, "D"), "Timedelta[ns]"), + (lambda x: Timedelta(x, "D").as_unit("s"), "Timedelta[s]"), ], ) - def test_constructor_dict_timedelta64_index(self, klass): + def test_constructor_dict_timedelta64_index(self, klass, name): # GH 10160 td_as_int = [1, 2, 3, 4] - if klass is timedelta: - constructor = lambda x: timedelta(days=x) - else: - constructor = lambda x: klass(x, "D") - - data = {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} + data = {i: {klass(s): 2 * i} for i, s in enumerate(td_as_int)} expected = DataFrame( [ @@ -881,7 +870,13 @@ def test_constructor_dict_timedelta64_index(self, klass): index=[Timedelta(td, "D") for td in td_as_int], ) - result = DataFrame(data) + if name == "Timedelta[s]": + # TODO(2.0): passing index here shouldn't be necessary, is for now + # otherwise we raise in _extract_index + result = DataFrame(data, index=expected.index) + else: + result = DataFrame(data) + tm.assert_frame_equal(result, expected) def test_constructor_period_dict(self): diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index e9a26b26eefc48..fd7c47d47112f7 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -2,6 +2,7 @@ import pytest from pandas._libs import ( + Timedelta, lib, writers as libwriters, ) @@ -42,6 +43,34 @@ def test_fast_unique_multiple_list_gen_sort(self): out = lib.fast_unique_multiple_list_gen(gen, sort=False) tm.assert_numpy_array_equal(np.array(out), expected) + def test_fast_multiget_timedelta_resos(self): + # This will become relevant for test_constructor_dict_timedelta64_index + # once Timedelta constructor preserves reso when passed a + # np.timedelta64 object + td = Timedelta(days=1) + + mapping1 = {td: 1} + mapping2 = {td.as_unit("s"): 1} + + oindex = Index([td * n for n in range(3)])._values.astype(object) + + expected = lib.fast_multiget(mapping1, oindex) + result = lib.fast_multiget(mapping2, oindex) + tm.assert_numpy_array_equal(result, expected) + + # case that can't be cast to td64ns + td = Timedelta(np.timedelta64(400, "Y")) + assert hash(td) == hash(td.as_unit("ms")) + assert hash(td) == hash(td.as_unit("us")) + mapping1 = {td: 1} + mapping2 = {td.as_unit("ms"): 1} + + oindex = Index([td * n for n in range(3)])._values.astype(object) + + expected = lib.fast_multiget(mapping1, oindex) + result = lib.fast_multiget(mapping2, oindex) + tm.assert_numpy_array_equal(result, expected) + class TestIndexing: def test_maybe_indices_to_slice_left_edge(self):