diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d11ab82294be1..b25a66bd2a69f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -739,6 +739,7 @@ Other Deprecations - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`) - Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`) - Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`) +- Deprecated non-ISO date string formats in :meth:`DatetimeIndex.__getitem__` with string labels. Use ISO format (YYYY-MM-DD) instead. (:issue:`58302`) - Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) - Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`) - Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 382b3678da75b..21ab81ab133dd 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2,6 +2,7 @@ import datetime as dt import operator +import re from typing import ( TYPE_CHECKING, Self, @@ -110,6 +111,28 @@ def _new_DatetimeIndex(cls, d): return result +def _is_iso_format_string(date_str: str) -> bool: + """ + Check if a date string follows ISO8601 format. + + ISO format must start with a 4-digit year (YYYY), optionally followed by + month and day with consistent separators. + + Examples of ISO format (True): + - 2024-01-10 + - 2024/01/10 + - 2024 01 10 + - 2024-01-10T00:00:00 + + Examples of non-ISO format (False): + - 01/10/2024 (MM/DD/YYYY) + - 10/01/2024 (DD/MM/YYYY) + - 01-10-2024 (MM-DD-YYYY) + """ + # ISO format must start with 4-digit year followed by separator (-, /, ., or space) + return re.match(r"^\d{4}[-/. ]", date_str) is not None + + @inherit_names( DatetimeArray._field_ops + [ @@ -613,6 +636,14 @@ def get_loc(self, key): parsed, reso = self._parse_with_reso(key) except ValueError as err: raise KeyError(key) from err + # GH#58302 - Deprecate non-ISO string formats in .loc indexing + if not _is_iso_format_string(key): + msg = ( + "Parsing non-ISO datetime strings in .loc is deprecated " + "and will be removed in a future version. Use ISO format " + f"(YYYY-MM-DD) instead. Got '{key}'." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) self._disallow_mismatched_indexing(parsed) if self._can_partial_date_slice(reso): @@ -702,11 +733,31 @@ def check_str_or_none(point) -> bool: in_index = True if start is not None: start_casted = self._maybe_cast_slice_bound(start, "left") + + # GH#58302 - Deprecate non-ISO string formats in .loc indexing + if isinstance(start, str) and not _is_iso_format_string(start): + msg = ( + "Parsing non-ISO datetime strings in .loc is deprecated " + "and will be removed in a future version. Use ISO format " + f"(YYYY-MM-DD) instead. Got '{start}'." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + mask = start_casted <= self in_index &= (start_casted == self).any() if end is not None: end_casted = self._maybe_cast_slice_bound(end, "right") + + # GH#58302 - Deprecate non-ISO string formats in .loc indexing + if isinstance(end, str) and not _is_iso_format_string(end): + msg = ( + "Parsing non-ISO datetime strings in .loc is deprecated " + "and will be removed in a future version. Use ISO format " + f"(YYYY-MM-DD) instead. Got '{end}'." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + mask = (self <= end_casted) & mask in_index &= (end_casted == self).any() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4955b1fe0da54..2ba1126a64d63 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -28,7 +28,12 @@ from pandas.core.arrays import BooleanArray import pandas.core.common as com -pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning") +pytestmark = [ + pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning"), + pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ), +] def test_repr(): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 11877024e7be0..e0557fca6dfc0 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -26,6 +26,10 @@ from pandas.tseries.frequencies import to_offset +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 94175a56f1c4a..01dc6c44e50b9 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( DataFrame, DatetimeIndex, @@ -19,6 +21,10 @@ class TestSlicing: + pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) + def test_string_index_series_name_converted(self): # GH#1644 df = DataFrame( @@ -464,3 +470,96 @@ def test_slice_reduce_to_series(self): ) result = df.loc["2000", "A"] tm.assert_series_equal(result, expected) + + +class TestDatetimeIndexNonISODeprecation: + """Tests for deprecation of non-ISO string formats in .loc indexing. GH#58302""" + + @pytest.fixture + def ser_daily(self): + """Create a Series with daily DatetimeIndex for testing.""" + return Series( + range(15), + index=DatetimeIndex(date_range(start="2024-01-01", freq="D", periods=15)), + ) + + @pytest.mark.parametrize( + "date_string", + [ + "1/10/2024", # MM/DD/YYYY format + "01/10/2024", # MM/DD/YYYY format with leading zero + ], + ) + def test_loc_indexing_non_iso_single_key_deprecation(self, ser_daily, date_string): + # GH#58302 + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser_daily.loc[date_string] + assert result == 9 + + @pytest.mark.parametrize( + "date_string,expected", + [ + ("2024-01-10", 9), # YYYY-MM-DD (dash) + ("2024/01/10", 9), # YYYY/MM/DD (slash) + ("2024 01 10", 9), # YYYY MM DD (space) + ], + ) + def test_loc_indexing_iso_format_no_warning(self, ser_daily, date_string, expected): + # GH#58302 - ISO formats should NOT warn + with tm.assert_produces_warning(None): + result = ser_daily.loc[date_string] + assert result == expected + + @pytest.mark.parametrize( + "start_string", + [ + "1/10/2024", # MM/DD/YYYY format + "01/10/2024", # MM/DD/YYYY format with leading zero + ], + ) + def test_loc_slicing_non_iso_start_deprecation(self, ser_daily, start_string): + # GH#58302 - Non-ISO start in slice should warn + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser_daily.loc[start_string:"2024-01-15"] + assert len(result) > 0 + + @pytest.mark.parametrize( + "end_string", + [ + "5-01-2024", # DD-MM-YYYY format + "05-01-2024", # DD-MM-YYYY format with leading zero + ], + ) + def test_loc_slicing_non_iso_end_deprecation(self, ser_daily, end_string): + # GH#58302 - Non-ISO end in slice should warn + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser_daily.loc["2024-01-01":end_string] + assert len(result) > 0 + + def test_loc_slicing_both_non_iso_deprecation(self, ser_daily): + # GH#58302 - Both non-ISO should warn (twice) + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning( + Pandas4Warning, match=msg, check_stacklevel=False + ): + result = ser_daily.loc["1/10/2024":"5-01-2024"] + assert len(result) > 0 + + def test_loc_slicing_iso_formats_no_warning(self, ser_daily): + # GH#58302 - ISO slice formats should NOT warn + with tm.assert_produces_warning(None): + result = ser_daily.loc["2024-01-05":"2024-01-10"] + assert len(result) == 6 + + def test_loc_non_string_keys_no_warning(self, ser_daily): + # GH#58302 - Non-string keys should not warn + with tm.assert_produces_warning(None): + result = ser_daily.loc[Timestamp("2024-01-10")] + assert result == 9 diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py index 64cc1fa621b31..0d9b3b36eb955 100644 --- a/pandas/tests/indexes/multi/test_partial_indexing.py +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -9,6 +9,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + @pytest.fixture def df(): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 75382cb735288..7c147875ed796 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -21,6 +21,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + dti4 = date_range("2016-01-01", periods=4) dti = dti4[:-1] rng = pd.Index(range(3)) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 8d173d850583f..36b0720f59d83 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -10,6 +10,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + class TestPeriodIndex: def test_getitem_periodindex_duplicates_string_slice(self): diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 7f298e9bdd375..f8ff922f06f55 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -19,6 +19,10 @@ import pandas._testing as tm from pandas.tests.indexing.common import _mklbl +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + class TestMultiIndexSlicers: def test_per_axis_per_level_getitem(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index de2d914aab229..3144a80c9178a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -44,6 +44,10 @@ from pandas.core.indexing import _one_ellipsis_message from pandas.tests.indexing.common import check_indexing_smoketest_or_raises +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + @pytest.mark.parametrize( "series, new_series, expected_ser", diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 3cd7f6c336956..f10f43944bd9b 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -38,6 +38,10 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Minute +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + @pytest.fixture def simple_date_range_series(): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index f3c52a674cf66..e05a6c8550ab6 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -16,6 +16,10 @@ import pandas._testing as tm from pandas.core.indexes.datetimes import date_range +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + @pytest.fixture def test_frame(): diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 97cafc33611ed..3a753983c663b 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -27,6 +27,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + def test_fancy_getitem(): dti = date_range( diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 37d6c9b42e003..857cc99befa78 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -37,6 +37,10 @@ from pandas.tseries.offsets import BDay +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + class TestSeriesGetitemScalars: def test_getitem_object_index_float_string(self): diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index a67f3ec708f24..7120fae1a7a86 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -8,6 +8,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) + def test_xs_datetimelike_wrapping(): # GH#31630 a case where we shouldn't wrap datetime64 in Timestamp