Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: delta_to_nanoseconds handle non-nano #47191

Merged
merged 1 commit into from
Jun 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def to_offset(freq: None) -> None: ...
def to_offset(freq: timedelta | BaseOffset | str) -> BaseOffset: ...

class Tick(SingleConstructorOffset):
_reso: int
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
@property
def delta(self) -> Timedelta: ...
Expand Down
7 changes: 7 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -968,42 +968,49 @@ cdef class Day(Tick):
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"
_period_dtype_code = PeriodDtypeCode.D
_reso = NPY_DATETIMEUNIT.NPY_FR_D


cdef class Hour(Tick):
_nanos_inc = 3600 * 1_000_000_000
_prefix = "H"
_period_dtype_code = PeriodDtypeCode.H
_reso = NPY_DATETIMEUNIT.NPY_FR_h


cdef class Minute(Tick):
_nanos_inc = 60 * 1_000_000_000
_prefix = "T"
_period_dtype_code = PeriodDtypeCode.T
_reso = NPY_DATETIMEUNIT.NPY_FR_m


cdef class Second(Tick):
_nanos_inc = 1_000_000_000
_prefix = "S"
_period_dtype_code = PeriodDtypeCode.S
_reso = NPY_DATETIMEUNIT.NPY_FR_s


cdef class Milli(Tick):
_nanos_inc = 1_000_000
_prefix = "L"
_period_dtype_code = PeriodDtypeCode.L
_reso = NPY_DATETIMEUNIT.NPY_FR_ms


cdef class Micro(Tick):
_nanos_inc = 1000
_prefix = "U"
_period_dtype_code = PeriodDtypeCode.U
_reso = NPY_DATETIMEUNIT.NPY_FR_us


cdef class Nano(Tick):
_nanos_inc = 1
_prefix = "N"
_period_dtype_code = PeriodDtypeCode.N
_reso = NPY_DATETIMEUNIT.NPY_FR_ns


def delta_to_tick(delta: timedelta) -> Tick:
Expand Down
15 changes: 9 additions & 6 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1680,14 +1680,17 @@ cdef class _Period(PeriodMixin):

def _add_timedeltalike_scalar(self, other) -> "Period":
cdef:
int64_t nanos, base_nanos
int64_t inc

if is_tick_object(self.freq):
nanos = delta_to_nanoseconds(other)
base_nanos = self.freq.base.nanos
if nanos % base_nanos == 0:
ordinal = self.ordinal + (nanos // base_nanos)
return Period(ordinal=ordinal, freq=self.freq)
try:
inc = delta_to_nanoseconds(other, reso=self.freq._reso, round_ok=False)
except ValueError as err:
raise IncompatibleFrequency("Input cannot be converted to "
f"Period(freq={self.freqstr})") from err
# TODO: overflow-check here
ordinal = self.ordinal + inc
return Period(ordinal=ordinal, freq=self.freq)
raise IncompatibleFrequency("Input cannot be converted to "
f"Period(freq={self.freqstr})")

Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ from .np_datetime cimport NPY_DATETIMEUNIT


# Exposed for tslib, not intended for outside use.
cpdef int64_t delta_to_nanoseconds(delta) except? -1
cpdef int64_t delta_to_nanoseconds(
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*, bint allow_year_month=*
) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef bint is_any_td_scalar(object obj)

Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ def array_to_timedelta64(
errors: str = ...,
) -> np.ndarray: ... # np.ndarray[m8ns]
def parse_timedelta_unit(unit: str | None) -> UnitChoices: ...
def delta_to_nanoseconds(delta: np.timedelta64 | timedelta | Tick) -> int: ...
def delta_to_nanoseconds(
delta: np.timedelta64 | timedelta | Tick,
reso: int = ..., # NPY_DATETIMEUNIT
round_ok: bool = ...,
allow_year_month: bool = ...,
) -> int: ...

class Timedelta(timedelta):
min: ClassVar[Timedelta]
Expand Down
75 changes: 62 additions & 13 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -201,28 +201,76 @@ def ints_to_pytimedelta(ndarray m8values, box=False):

# ----------------------------------------------------------------------

cpdef int64_t delta_to_nanoseconds(delta) except? -1:
if is_tick_object(delta):
return delta.nanos
if isinstance(delta, _Timedelta):
if delta._reso == NPY_FR_ns:
return delta.value
raise NotImplementedError(delta._reso)

if is_timedelta64_object(delta):
return get_timedelta64_value(ensure_td64ns(delta))
cpdef int64_t delta_to_nanoseconds(
delta,
NPY_DATETIMEUNIT reso=NPY_FR_ns,
bint round_ok=True,
bint allow_year_month=False,
) except? -1:
cdef:
_Timedelta td
NPY_DATETIMEUNIT in_reso
int64_t n

if is_tick_object(delta):
n = delta.n
in_reso = delta._reso
if in_reso == reso:
return n
else:
td = Timedelta._from_value_and_reso(delta.n, reso=in_reso)

elif isinstance(delta, _Timedelta):
td = delta
n = delta.value
in_reso = delta._reso
if in_reso == reso:
return n

elif is_timedelta64_object(delta):
in_reso = get_datetime64_unit(delta)
n = get_timedelta64_value(delta)
if in_reso == reso:
return n
else:
# _from_value_and_reso does not support Year, Month, or unit-less,
# so we have special handling if speciifed
try:
td = Timedelta._from_value_and_reso(n, reso=in_reso)
except NotImplementedError:
if allow_year_month:
td64 = ensure_td64ns(delta)
return delta_to_nanoseconds(td64, reso=reso)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should reso always be NPY_FR_ns here? If so, should be safer to just hardcode reso=NPY_FR_ns?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait, no. this is correct as-is

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah so this branch converts delta to ns no matter what to ensure the recursive call will exit appropriately the next time. Got it

else:
raise

if PyDelta_Check(delta):
elif PyDelta_Check(delta):
in_reso = NPY_DATETIMEUNIT.NPY_FR_us
try:
return (
n = (
delta.days * 24 * 3600 * 1_000_000
+ delta.seconds * 1_000_000
+ delta.microseconds
) * 1000
)
except OverflowError as err:
raise OutOfBoundsTimedelta(*err.args) from err

raise TypeError(type(delta))
if in_reso == reso:
return n
else:
td = Timedelta._from_value_and_reso(n, reso=in_reso)

else:
raise TypeError(type(delta))

try:
return td._as_reso(reso, round_ok=round_ok).value
except OverflowError as err:
unit_str = npy_unit_to_abbrev(reso)
raise OutOfBoundsTimedelta(
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
) from err


@cython.overflowcheck(True)
Expand Down Expand Up @@ -1411,6 +1459,7 @@ cdef class _Timedelta(timedelta):
else:
mult = get_conversion_factor(self._reso, reso)
with cython.overflowcheck(True):
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
value = self.value * mult
return type(self)._from_value_and_reso(value, reso=reso)

Expand Down
12 changes: 11 additions & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,17 @@ cdef class _Timestamp(ABCTimestamp):
raise NotImplementedError(self._reso)

if is_any_td_scalar(other):
nanos = delta_to_nanoseconds(other)
if (
is_timedelta64_object(other)
and get_datetime64_unit(other) == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# TODO: deprecate allowing this? We only get here
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deprecate specifically adding a np.timedelta64 object without a unit (and not the interval adding behavior)?

If so, +1.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct

# with test_timedelta_add_timestamp_interval
other = np.timedelta64(other.view("i8"), "ns")
# TODO: disallow round_ok, allow_year_month?
nanos = delta_to_nanoseconds(
other, reso=self._reso, round_ok=True, allow_year_month=True
)
try:
result = type(self)(self.value + nanos, tz=self.tzinfo)
except OverflowError:
Expand Down
25 changes: 3 additions & 22 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1120,28 +1120,9 @@ def _add_timedeltalike_scalar(self, other):
new_values.fill(iNaT)
return type(self)(new_values, dtype=self.dtype)

# FIXME: this may overflow with non-nano
inc = delta_to_nanoseconds(other)

if not is_period_dtype(self.dtype):
# FIXME: don't hardcode 7, 8, 9, 10 here
# TODO: maybe patch delta_to_nanoseconds to take reso?

# error: "DatetimeLikeArrayMixin" has no attribute "_reso"
reso = self._reso # type: ignore[attr-defined]
if reso == 10:
pass
elif reso == 9:
# microsecond
inc = inc // 1000
elif reso == 8:
# millisecond
inc = inc // 1_000_000
elif reso == 7:
# second
inc = inc // 1_000_000_000
else:
raise NotImplementedError(reso)
# PeriodArray overrides, so we only get here with DTA/TDA
# error: "DatetimeLikeArrayMixin" has no attribute "_reso"
inc = delta_to_nanoseconds(other, reso=self._reso) # type: ignore[attr-defined]

new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan)
new_values = new_values.view("i8")
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ def _add_offset(self, other: BaseOffset):
self._require_matching_freq(other, base=True)
return self._addsub_int_array_or_scalar(other.n, operator.add)

# TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
def _add_timedeltalike_scalar(self, other):
"""
Parameters
Expand All @@ -797,10 +798,15 @@ def _add_timedeltalike_scalar(self, other):
raise raise_on_incompatible(self, other)

if notna(other):
# special handling for np.timedelta64("NaT"), avoid calling
# _check_timedeltalike_freq_compat as that would raise TypeError
other = self._check_timedeltalike_freq_compat(other)
other = np.timedelta64(other, "ns")
# Convert to an integer increment of our own freq, disallowing
# e.g. 30seconds if our freq is minutes.
try:
inc = delta_to_nanoseconds(other, reso=self.freq._reso, round_ok=False)
except ValueError as err:
# "Cannot losslessly convert units"
raise raise_on_incompatible(self, other) from err

return self._addsub_int_array_or_scalar(inc, operator.add)

return super()._add_timedeltalike_scalar(other)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/scalar/timedelta/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def test_td_add_timestamp_overflow(self):
with pytest.raises(OverflowError, match=msg):
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + timedelta(days=13 * 19999)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def test_overflow_on_construction():
with pytest.raises(OverflowError, match=msg):
Timedelta(7 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(timedelta(days=13 * 19999))

Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/scalar/timestamp/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pandas._libs.tslibs import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
Timedelta,
Timestamp,
offsets,
Expand Down Expand Up @@ -45,16 +46,20 @@ def test_overflow_offset_raises(self):
"will overflow"
)
lmsg = "|".join(
["Python int too large to convert to C long", "int too big to convert"]
[
"Python int too large to convert to C (long|int)",
"int too big to convert",
]
)
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"

with pytest.raises(OverflowError, match=lmsg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp + offset_overflow

with pytest.raises(OverflowError, match=msg):
offset_overflow + stamp

with pytest.raises(OverflowError, match=lmsg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp - offset_overflow

# xref https://github.com/pandas-dev/pandas/issues/14080
Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1847,14 +1847,7 @@ def test_to_datetime_list_of_integers(self):
def test_to_datetime_overflow(self):
# gh-17637
# we are overflowing Timedelta range here

msg = "|".join(
[
"Python int too large to convert to C long",
"long too big to convert",
"int too big to convert",
]
)
msg = "Cannot cast 139999 days, 0:00:00 to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
date_range(start="1/1/1700", freq="B", periods=100000)

Expand Down