Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,7 @@ Other Deprecations
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
- Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`)
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
- Deprecated non-ISO date string formats in :meth:`DatetimeIndex.__getitem__` with string labels. Use ISO format (YYYY-MM-DD) instead. (:issue:`58302`)
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
- Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`)
- Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`)
Expand Down
51 changes: 51 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime as dt
import operator
import re
from typing import (
TYPE_CHECKING,
Self,
Expand Down Expand Up @@ -110,6 +111,28 @@ def _new_DatetimeIndex(cls, d):
return result


def _is_iso_format_string(date_str: str) -> bool:
"""
Check if a date string follows ISO8601 format.

ISO format must start with a 4-digit year (YYYY), optionally followed by
month and day with consistent separators.

Examples of ISO format (True):
- 2024-01-10
- 2024/01/10
- 2024 01 10
- 2024-01-10T00:00:00

Examples of non-ISO format (False):
- 01/10/2024 (MM/DD/YYYY)
- 10/01/2024 (DD/MM/YYYY)
- 01-10-2024 (MM-DD-YYYY)
"""
# ISO format must start with 4-digit year followed by separator (-, /, ., or space)
return re.match(r"^\d{4}[-/. ]", date_str) is not None


@inherit_names(
DatetimeArray._field_ops
+ [
Expand Down Expand Up @@ -613,6 +636,14 @@ def get_loc(self, key):
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
raise KeyError(key) from err
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if not _is_iso_format_string(key):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{key}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
self._disallow_mismatched_indexing(parsed)

if self._can_partial_date_slice(reso):
Expand Down Expand Up @@ -702,11 +733,31 @@ def check_str_or_none(point) -> bool:
in_index = True
if start is not None:
start_casted = self._maybe_cast_slice_bound(start, "left")

# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if isinstance(start, str) and not _is_iso_format_string(start):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{start}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())

mask = start_casted <= self
in_index &= (start_casted == self).any()

if end is not None:
end_casted = self._maybe_cast_slice_bound(end, "right")

# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if isinstance(end, str) and not _is_iso_format_string(end):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{end}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())

mask = (self <= end_casted) & mask
in_index &= (end_casted == self).any()

Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
from pandas.core.arrays import BooleanArray
import pandas.core.common as com

pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
pytestmark = [
pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning"),
pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
),
]


def test_repr():
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@

from pandas.tseries.frequencies import to_offset

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)


Expand Down
99 changes: 99 additions & 0 deletions pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

from pandas.errors import Pandas4Warning

from pandas import (
DataFrame,
DatetimeIndex,
Expand All @@ -19,6 +21,10 @@


class TestSlicing:
pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

def test_string_index_series_name_converted(self):
# GH#1644
df = DataFrame(
Expand Down Expand Up @@ -464,3 +470,96 @@ def test_slice_reduce_to_series(self):
)
result = df.loc["2000", "A"]
tm.assert_series_equal(result, expected)


class TestDatetimeIndexNonISODeprecation:
"""Tests for deprecation of non-ISO string formats in .loc indexing. GH#58302"""

@pytest.fixture
def ser_daily(self):
"""Create a Series with daily DatetimeIndex for testing."""
return Series(
range(15),
index=DatetimeIndex(date_range(start="2024-01-01", freq="D", periods=15)),
)

@pytest.mark.parametrize(
"date_string",
[
"1/10/2024", # MM/DD/YYYY format
"01/10/2024", # MM/DD/YYYY format with leading zero
],
)
def test_loc_indexing_non_iso_single_key_deprecation(self, ser_daily, date_string):
# GH#58302
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc[date_string]
assert result == 9

@pytest.mark.parametrize(
"date_string,expected",
[
("2024-01-10", 9), # YYYY-MM-DD (dash)
("2024/01/10", 9), # YYYY/MM/DD (slash)
("2024 01 10", 9), # YYYY MM DD (space)
],
)
def test_loc_indexing_iso_format_no_warning(self, ser_daily, date_string, expected):
# GH#58302 - ISO formats should NOT warn
with tm.assert_produces_warning(None):
result = ser_daily.loc[date_string]
assert result == expected

@pytest.mark.parametrize(
"start_string",
[
"1/10/2024", # MM/DD/YYYY format
"01/10/2024", # MM/DD/YYYY format with leading zero
],
)
def test_loc_slicing_non_iso_start_deprecation(self, ser_daily, start_string):
# GH#58302 - Non-ISO start in slice should warn
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc[start_string:"2024-01-15"]
assert len(result) > 0

@pytest.mark.parametrize(
"end_string",
[
"5-01-2024", # DD-MM-YYYY format
"05-01-2024", # DD-MM-YYYY format with leading zero
],
)
def test_loc_slicing_non_iso_end_deprecation(self, ser_daily, end_string):
# GH#58302 - Non-ISO end in slice should warn
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc["2024-01-01":end_string]
assert len(result) > 0

def test_loc_slicing_both_non_iso_deprecation(self, ser_daily):
# GH#58302 - Both non-ISO should warn (twice)
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(
Pandas4Warning, match=msg, check_stacklevel=False
):
result = ser_daily.loc["1/10/2024":"5-01-2024"]
assert len(result) > 0

def test_loc_slicing_iso_formats_no_warning(self, ser_daily):
# GH#58302 - ISO slice formats should NOT warn
with tm.assert_produces_warning(None):
result = ser_daily.loc["2024-01-05":"2024-01-10"]
assert len(result) == 6

def test_loc_non_string_keys_no_warning(self, ser_daily):
# GH#58302 - Non-string keys should not warn
with tm.assert_produces_warning(None):
result = ser_daily.loc[Timestamp("2024-01-10")]
assert result == 9
4 changes: 4 additions & 0 deletions pandas/tests/indexes/multi/test_partial_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


@pytest.fixture
def df():
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

dti4 = date_range("2016-01-01", periods=4)
dti = dti4[:-1]
rng = pd.Index(range(3))
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/period/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


class TestPeriodIndex:
def test_getitem_periodindex_duplicates_string_slice(self):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexing/multiindex/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import pandas._testing as tm
from pandas.tests.indexing.common import _mklbl

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


class TestMultiIndexSlicers:
def test_per_axis_per_level_getitem(self):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
from pandas.core.indexing import _one_ellipsis_message
from pandas.tests.indexing.common import check_indexing_smoketest_or_raises

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


@pytest.mark.parametrize(
"series, new_series, expected_ser",
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import Minute

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


@pytest.fixture
def simple_date_range_series():
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


@pytest.fixture
def test_frame():
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/series/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


def test_fancy_getitem():
dti = date_range(
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@

from pandas.tseries.offsets import BDay

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


class TestSeriesGetitemScalars:
def test_getitem_object_index_float_string(self):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/series/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)


def test_xs_datetimelike_wrapping():
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
Expand Down
Loading