diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7f90bc5d7da74..2d18a275f26f5 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -333,58 +333,39 @@ cdef convert_to_timedelta64(object ts, str unit): Handle these types of objects: - timedelta/Timedelta - - timedelta64 - - an offset - - np.int64 (with unit providing a possible modifier) - - None/NaT - Return an ns based int64 + Return an timedelta64[ns] object """ # Caller is responsible for checking unit not in ["Y", "y", "M"] - if checknull_with_nat_and_na(ts): - return np.timedelta64(NPY_NAT, "ns") - elif isinstance(ts, _Timedelta): + if isinstance(ts, _Timedelta): # already in the proper format if ts._creso != NPY_FR_ns: ts = ts.as_unit("ns").asm8 else: ts = np.timedelta64(ts._value, "ns") - elif cnp.is_timedelta64_object(ts): - ts = ensure_td64ns(ts) - elif is_integer_object(ts): - if ts == NPY_NAT: - return np.timedelta64(NPY_NAT, "ns") - else: - ts = _maybe_cast_from_unit(ts, unit) - elif is_float_object(ts): - ts = _maybe_cast_from_unit(ts, unit) - elif isinstance(ts, str): - if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): - ts = parse_iso_format_string(ts) - else: - ts = parse_timedelta_string(ts) - ts = np.timedelta64(ts, "ns") - elif is_tick_object(ts): - ts = np.timedelta64(ts.nanos, "ns") - if PyDelta_Check(ts): + elif PyDelta_Check(ts): ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") elif not cnp.is_timedelta64_object(ts): raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}") return ts.astype("timedelta64[ns]") -cdef _maybe_cast_from_unit(ts, str unit): +cdef _numeric_to_td64ns(object item, str unit): # caller is responsible for checking # assert unit not in ["Y", "y", "M"] + # assert is_integer_object(item) or is_float_object(item) + if is_integer_object(item) and item == NPY_NAT: + return np.timedelta64(NPY_NAT, "ns") + try: - ts = cast_from_unit(ts, unit) + item = cast_from_unit(item, unit) except OutOfBoundsDatetime as err: raise OutOfBoundsTimedelta( - f"Cannot cast {ts} from {unit} to 'ns' without overflow." + f"Cannot cast {item} from {unit} to 'ns' without overflow." ) from err - ts = np.timedelta64(ts, "ns") + ts = np.timedelta64(item, "ns") return ts @@ -408,10 +389,11 @@ def array_to_timedelta64( cdef: Py_ssize_t i, n = values.size ndarray result = np.empty((values).shape, dtype="m8[ns]") - object item + object item, td64ns_obj int64_t ival cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values) cnp.flatiter it + str parsed_unit = parse_timedelta_unit(unit or "ns") if values.descr.type_num != cnp.NPY_OBJECT: # raise here otherwise we segfault below @@ -431,70 +413,63 @@ def array_to_timedelta64( ) cnp.PyArray_ITER_NEXT(it) - # Usually, we have all strings. If so, we hit the fast path. - # If this path fails, we try conversion a different way, and - # this is where all of the error handling will take place. - try: - for i in range(n): - # Analogous to: item = values[i] - item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + for i in range(n): + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - ival = _item_to_timedelta64_fastpath(item) + try: + if checknull_with_nat_and_na(item): + ival = NPY_NAT - # Analogous to: iresult[i] = ival - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + elif cnp.is_timedelta64_object(item): + td64ns_obj = ensure_td64ns(item) + ival = cnp.get_timedelta64_value(td64ns_obj) - cnp.PyArray_MultiIter_NEXT(mi) + elif isinstance(item, _Timedelta): + if item._creso != NPY_FR_ns: + ival = item.as_unit("ns")._value + else: + ival = item._value + + elif PyDelta_Check(item): + # i.e. isinstance(item, timedelta) + ival = delta_to_nanoseconds(item) + + elif isinstance(item, str): + if ( + (len(item) > 0 and item[0] == "P") + or (len(item) > 1 and item[:2] == "-P") + ): + ival = parse_iso_format_string(item) + else: + ival = parse_timedelta_string(item) - except (TypeError, ValueError): - cnp.PyArray_MultiIter_RESET(mi) + elif is_tick_object(item): + ival = item.nanos - parsed_unit = parse_timedelta_unit(unit or "ns") - for i in range(n): - item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + elif is_integer_object(item) or is_float_object(item): + td64ns_obj = _numeric_to_td64ns(item, parsed_unit) + ival = cnp.get_timedelta64_value(td64ns_obj) - ival = _item_to_timedelta64(item, parsed_unit, errors) + else: + raise TypeError(f"Invalid type for timedelta scalar: {type(item)}") + + except ValueError as err: + if errors == "coerce": + ival = NPY_NAT + elif "unit abbreviation w/o a number" in str(err): + # re-raise with more pertinent message + msg = f"Could not convert '{item}' to NumPy timedelta" + raise ValueError(msg) from err + else: + raise - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return result -cdef int64_t _item_to_timedelta64_fastpath(object item) except? -1: - """ - See array_to_timedelta64. - """ - if item is NaT: - # we allow this check in the fast-path because NaT is a C-object - # so this is an inexpensive check - return NPY_NAT - else: - return parse_timedelta_string(item) - - -cdef int64_t _item_to_timedelta64( - object item, - str parsed_unit, - str errors -) except? -1: - """ - See array_to_timedelta64. - """ - try: - return cnp.get_timedelta64_value(convert_to_timedelta64(item, parsed_unit)) - except ValueError as err: - if errors == "coerce": - return NPY_NAT - elif "unit abbreviation w/o a number" in str(err): - # re-raise with more pertinent message - msg = f"Could not convert '{item}' to NumPy timedelta" - raise ValueError(msg) from err - else: - raise - - @cython.cpow(True) cdef int64_t parse_timedelta_string(str ts) except? -1: """ @@ -2154,12 +2129,14 @@ class Timedelta(_Timedelta): new_value = delta_to_nanoseconds(value, reso=new_reso) return cls._from_value_and_reso(new_value, reso=new_reso) + elif checknull_with_nat_and_na(value): + return NaT + elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' unit = parse_timedelta_unit(unit) - value = convert_to_timedelta64(value, unit) - elif checknull_with_nat_and_na(value): - return NaT + value = _numeric_to_td64ns(value, unit) + else: raise ValueError( "Value must be Timedelta, string, integer, " diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 52943f4e10148..b83dc3052fbb3 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1011,7 +1011,9 @@ def test_dt64arr_sub_timestamp_tzaware(self, box_with_array): ser = tm.box_expected(ser, box_with_array) - delta_series = Series([np.timedelta64(0, "D"), np.timedelta64(1, "D")]) + delta_series = Series( + [np.timedelta64(0, "D"), np.timedelta64(1, "D")], dtype="m8[ns]" + ) expected = tm.box_expected(delta_series, box_with_array) tm.assert_equal(ser - ts, expected) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 67762e0b89c73..0514bc203bf66 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1641,7 +1641,9 @@ def test_pi_sub_period(self): result = np.subtract(Period("2012-01", freq="M"), idx) tm.assert_index_equal(result, exp) - exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + exp = TimedeltaIndex( + [np.nan, np.nan, np.nan, np.nan], name="idx", dtype="m8[ns]" + ) result = idx - Period("NaT", freq="M") tm.assert_index_equal(result, exp) assert result.freq == exp.freq @@ -1655,7 +1657,7 @@ def test_pi_sub_pdnat(self): idx = PeriodIndex( ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" ) - exp = TimedeltaIndex([pd.NaT] * 4, name="idx") + exp = TimedeltaIndex([pd.NaT] * 4, name="idx", dtype="m8[ns]") tm.assert_index_equal(pd.NaT - idx, exp) tm.assert_index_equal(idx - pd.NaT, exp) @@ -1674,6 +1676,8 @@ def test_pi_sub_period_nat(self): exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name="idx") tm.assert_index_equal(result, exp) - exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + exp = TimedeltaIndex( + [np.nan, np.nan, np.nan, np.nan], name="idx", dtype="m8[ns]" + ) tm.assert_index_equal(idx - Period("NaT", freq="M"), exp) tm.assert_index_equal(Period("NaT", freq="M") - idx, exp) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 7212b93a7c5b7..9a54386abf281 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -850,7 +850,7 @@ def test_operators_timedelta64(self): assert rs.dtype == "timedelta64[ns]" df = DataFrame({"A": v1}) - td = Series([timedelta(days=i) for i in range(3)]) + td = Series([timedelta(days=i) for i in range(3)], dtype="m8[ns]") assert td.dtype == "timedelta64[ns]" # series on the rhs @@ -875,7 +875,9 @@ def test_operators_timedelta64(self): # datetimes on rhs result = df["A"] - datetime(2001, 1, 1) - expected = Series([timedelta(days=4017 + i) for i in range(3)], name="A") + expected = Series( + [timedelta(days=4017 + i) for i in range(3)], name="A", dtype="m8[ns]" + ) tm.assert_series_equal(result, expected) assert result.dtype == "m8[ns]" @@ -1559,7 +1561,7 @@ def test_tdi_rmul_arraylike(self, other, box_with_array): def test_td64arr_mul_bool_scalar_raises(self, box_with_array): # GH#58054 - ser = Series(np.arange(5) * timedelta(hours=1)) + ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]") obj = tm.box_expected(ser, box_with_array) msg = r"Cannot multiply 'timedelta64\[ns\]' by bool" @@ -1582,7 +1584,7 @@ def test_td64arr_mul_bool_scalar_raises(self, box_with_array): ) def test_td64arr_mul_bool_raises(self, dtype, box_with_array): # GH#58054 - ser = Series(np.arange(5) * timedelta(hours=1)) + ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]") obj = tm.box_expected(ser, box_with_array) other = Series(np.arange(5) < 0.5, dtype=dtype) @@ -1611,7 +1613,7 @@ def test_td64arr_mul_bool_raises(self, dtype, box_with_array): ], ) def test_td64arr_mul_masked(self, dtype, box_with_array): - ser = Series(np.arange(5) * timedelta(hours=1)) + ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]") obj = tm.box_expected(ser, box_with_array) other = Series(np.arange(5), dtype=dtype) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7c4ce4c67f13d..4d235587c2407 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -747,12 +747,14 @@ def test_operators_timedelta64(self): # works when only those columns are selected result = mixed[["A", "B"]].min(axis=1) - expected = Series([timedelta(days=-1)] * 3) + expected = Series([timedelta(days=-1)] * 3, dtype="m8[ns]") tm.assert_series_equal(result, expected) result = mixed[["A", "B"]].min() expected = Series( - [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"] + [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], + index=["A", "B"], + dtype="m8[ns]", ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 607336060cbbc..b1daa0c517570 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -22,7 +22,7 @@ def test_repr_round_days_non_nano(self): @pytest.mark.parametrize("method", ["__repr__", "__str__"]) def test_representation(self, method): - idx1 = TimedeltaIndex([], freq="D") + idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]") idx2 = TimedeltaIndex(["1 days"], freq="D") idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") @@ -53,7 +53,7 @@ def test_representation(self, method): # TODO: this is a Series.__repr__ test def test_representation_to_series(self): - idx1 = TimedeltaIndex([], freq="D") + idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]") idx2 = TimedeltaIndex(["1 days"], freq="D") idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") @@ -83,7 +83,7 @@ def test_representation_to_series(self): def test_summary(self): # GH#9116 - idx1 = TimedeltaIndex([], freq="D") + idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]") idx2 = TimedeltaIndex(["1 days"], freq="D") idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 3ab3c3e1c8633..951b8346ac9e6 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -160,7 +160,7 @@ def test_zero_length_input_index(self, sort): # if no overlap exists return empty index ( timedelta_range("1 day", periods=10, freq="h", name="idx")[5:], - TimedeltaIndex([], freq="h", name="idx"), + TimedeltaIndex([], freq="h", name="idx", dtype="m8[ns]"), ), ], ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d67e725233127..9c93be0937e91 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1131,7 +1131,7 @@ def test_url(self, field, dtype, httpserver): def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit="ms") - ser = Series([timedelta(23), timedelta(seconds=5)]) + ser = Series([timedelta(23), timedelta(seconds=5)], dtype="m8[ns]") assert ser.dtype == "timedelta64[ns]" msg = ( @@ -1148,7 +1148,7 @@ def test_timedelta(self): result = read_json(StringIO(ser.to_json()), typ="series").apply(converter) tm.assert_series_equal(result, ser) - frame = DataFrame([timedelta(23), timedelta(seconds=5)]) + frame = DataFrame([timedelta(23), timedelta(seconds=5)], dtype="m8[ns]") assert frame[0].dtype == "timedelta64[ns]" with tm.assert_produces_warning(Pandas4Warning, match=msg): diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index a9ed61e2c40cb..488eb99f81ef5 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -307,7 +307,7 @@ def test_sub_datetimelike_align(self): dt.iloc[2] = np.nan dt2 = dt[::-1] - expected = Series([timedelta(0), timedelta(0), pd.NaT]) + expected = Series([timedelta(0), timedelta(0), pd.NaT], dtype="m8[ns]") # name is reset result = dt2 - dt tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 08ad7b7fb1b93..9d5866ef97017 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -62,7 +62,9 @@ def test_to_timedelta_same_np_timedelta64(self): def test_to_timedelta_series(self): # Series - expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + expected = Series( + [timedelta(days=1), timedelta(days=1, seconds=1)], dtype="m8[ns]" + ) msg = "'d' is deprecated and will be removed in a future version." with tm.assert_produces_warning(Pandas4Warning, match=msg): @@ -185,7 +187,7 @@ def test_unambiguous_timedelta_values(self, val, errors): def test_to_timedelta_via_apply(self): # GH 5458 - expected = Series([np.timedelta64(1, "s")]) + expected = Series([np.timedelta64(1, "s")], dtype="m8[ns]") result = Series(["00:00:01"]).apply(to_timedelta) tm.assert_series_equal(result, expected) @@ -247,7 +249,7 @@ def test_to_timedelta_coerce_strings_unit(self): ) def test_to_timedelta_nullable_int64_dtype(self, expected_val, result_val): # GH 35574 - expected = Series([timedelta(days=1), expected_val]) + expected = Series([timedelta(days=1), expected_val], dtype="m8[ns]") result = to_timedelta(Series([1, result_val], dtype="Int64"), unit="days") tm.assert_series_equal(result, expected)