Skip to content

Commit

Permalink
Backport PR pandas-dev#57314: BUG: Fix near-minimum timestamp handling
Browse files Browse the repository at this point in the history
  • Loading branch information
robert-schmidtke authored and meeseeksmachine committed Feb 22, 2024
1 parent ea56e0c commit 251051c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.1.rst
Expand Up @@ -21,6 +21,7 @@ Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
- Fixed regression causing overflow for near-minimum timestamps (:issue:`57150`)
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)
Expand Down
18 changes: 14 additions & 4 deletions pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Expand Up @@ -482,10 +482,20 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,

if (base == NPY_FR_ns) {
int64_t nanoseconds;
PD_CHECK_OVERFLOW(
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
PD_CHECK_OVERFLOW(
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));

// Minimum valid timestamp in nanoseconds (1677-09-21 00:12:43.145224193).
const int64_t min_nanoseconds = NPY_MIN_INT64 + 1;
if (microseconds == min_nanoseconds / 1000 - 1) {
// For values within one microsecond of min_nanoseconds, use it as base
// and offset it with nanosecond delta to avoid overflow during scaling.
PD_CHECK_OVERFLOW(checked_int64_add(
min_nanoseconds, (dts->ps - _NS_MIN_DTS.ps) / 1000, &nanoseconds));
} else {
PD_CHECK_OVERFLOW(
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
PD_CHECK_OVERFLOW(
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
}

return nanoseconds;
}
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/tslibs/test_array_to_datetime.py
Expand Up @@ -296,6 +296,23 @@ def test_to_datetime_barely_out_of_bounds():
tslib.array_to_datetime(arr)


@pytest.mark.parametrize(
"timestamp",
[
# Close enough to bounds that scaling micros to nanos overflows
# but adding nanos would result in an in-bounds datetime.
"1677-09-21T00:12:43.145224193",
"1677-09-21T00:12:43.145224999",
# this always worked
"1677-09-21T00:12:43.145225000",
],
)
def test_to_datetime_barely_inside_bounds(timestamp):
# see gh-57150
result, _ = tslib.array_to_datetime(np.array([timestamp], dtype=object))
tm.assert_numpy_array_equal(result, np.array([timestamp], dtype="M8[ns]"))


class SubDatetime(datetime):
pass

Expand Down

0 comments on commit 251051c

Please sign in to comment.