diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..812009c09520c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -983,6 +983,7 @@ Timedelta Timezones ^^^^^^^^^ +- Bug in :func:`pandas.date_range` where using ``normalize=True`` could raise ``NonExistentTimeError`` or ``AmbiguousTimeError`` when crossing DST transitions, even if ``nonexistent`` or ``ambiguous`` parameters were specified. (:issue:`62602`) - Bug in :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` changing timezone to UTC when merging two DatetimeIndex objects with the same timezone but different units (:issue:`60080`) - Bug in :meth:`Series.dt.tz_localize` with a timezone-aware :class:`ArrowDtype` incorrectly converting to UTC when ``tz=None`` (:issue:`61780`) - Fixed bug in :func:`date_range` where tz-aware endpoints with calendar offsets (e.g. ``"MS"``) failed on DST fall-back. These now respect ``ambiguous``/ ``nonexistent``. (:issue:`52908`) diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index 3195ce9641f2b..e6e23a023542b 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -191,7 +191,11 @@ class Timestamp(datetime): ambiguous: bool | Literal["raise", "NaT"] = ..., nonexistent: TimestampNonexistent = ..., ) -> Self: ... - def normalize(self) -> Self: ... + def normalize( + self, + ambiguous: bool | Literal["raise", "NaT"] = ..., + nonexistent: TimestampNonexistent = ..., + ) -> Self: ... # TODO: round/floor/ceil could return NaT? def round( self, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2f0c5fa9ef18e..793247d196beb 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1308,7 +1308,7 @@ cdef class _Timestamp(ABCTimestamp): # ----------------------------------------------------------------- # Transformation Methods - def normalize(self) -> "Timestamp": + def normalize(self, ambiguous="raise", nonexistent="raise") -> "Timestamp": """ Normalize Timestamp to midnight, preserving tz information. @@ -1346,7 +1346,7 @@ cdef class _Timestamp(ABCTimestamp): "Cannot normalize Timestamp without integer overflow" ) from err ts = type(self)._from_value_and_reso(normalized, reso=self._creso, tz=None) - return ts.tz_localize(self.tzinfo) + return ts.tz_localize(self.tzinfo, ambiguous=ambiguous, nonexistent=nonexistent) # ----------------------------------------------------------------- # Pickle Methods diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d17ffbbfa5b4d..bdc2061f129cf 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -447,7 +447,9 @@ def _generate_range( end = end.as_unit(unit, round_ok=False) left_inclusive, right_inclusive = validate_inclusive(inclusive) - start, end = _maybe_normalize_endpoints(start, end, normalize) + start, end = _maybe_normalize_endpoints( + start, end, normalize, ambiguous, nonexistent + ) tz = _infer_tz_from_endpoints(start, end, tz) if tz is not None: @@ -466,6 +468,10 @@ def _generate_range( if end is not None and end.tz is not None: end = end.tz_localize(None) + start, end = _maybe_normalize_endpoints( + start, end, normalize, ambiguous, nonexistent + ) + if isinstance(freq, (Tick, Day)): i8values = generate_regular_range(start, end, periods, freq, unit=unit) else: @@ -2878,14 +2884,18 @@ def _infer_tz_from_endpoints( def _maybe_normalize_endpoints( - start: _TimestampNoneT1, end: _TimestampNoneT2, normalize: bool + start: _TimestampNoneT1, + end: _TimestampNoneT2, + normalize: bool, + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", ) -> tuple[_TimestampNoneT1, _TimestampNoneT2]: if normalize: if start is not None: - start = start.normalize() + start = start.normalize(ambiguous, nonexistent) if end is not None: - end = end.normalize() + end = end.normalize(ambiguous, nonexistent) return start, end diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 199e3572732a0..0e647c49c88c4 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -832,6 +832,28 @@ def test_date_range_uppercase_frequency_raises(self, freq): with pytest.raises(ValueError, match=msg): pd.date_range("1/1/2000", periods=4, freq=freq) + def test_date_range_normalize_nonexistent_ambiguous_dst(self): + # GH#62602: Ensure normalize works with nonexistent/ambiguous times (DST) + tz = "Africa/Cairo" + start = pd.Timestamp("2024-04-26 01:00:00", tz=tz) + end = pd.Timestamp("2024-04-27 00:00:00", tz=tz) + + result = pd.date_range( + start=start, + end=end, + freq="D", + tz=tz, + nonexistent="shift_forward", + ambiguous=True, + normalize=True, + ) + + expected = pd.to_datetime( + ["2024-04-26 01:00:00", "2024-04-27 00:00:00"], unit="ns" + ).tz_localize(tz) + + tm.assert_index_equal(result, expected) + def test_factorize_sort_without_freq(): dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]")