From 1845135c74b39944906051c6732f05b8a56cb384 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Oct 2025 11:38:40 -0700 Subject: [PATCH] BUG: infer_freq with Series[pyarrow[timestamp]] --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/tseries/frequencies/test_inference.py | 14 ++++++++++++++ pandas/tseries/frequencies.py | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b1dc78bbf8020..2c1af8a219558 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -975,6 +975,7 @@ Datetimelike - Bug in :class:`Timestamp` constructor failing to raise when given a ``np.datetime64`` object with non-standard unit (:issue:`25611`) - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`) +- Bug in :func:`infer_freq` with a :class:`Series` with :class:`ArrowDtype` timestamp dtype incorrectly raising ``TypeError`` (:issue:`58403`) - Bug in :func:`to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format`` raised ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index dad5c73b89626..05e1d50a86d3c 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -13,6 +13,7 @@ from pandas._libs.tslibs.offsets import _get_offset from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td from pandas import ( DatetimeIndex, @@ -542,3 +543,16 @@ def test_infer_freq_non_nano_tzaware(tz_aware_fixture): res = frequencies.infer_freq(dta) assert res == "B" + + +@td.skip_if_no("pyarrow") +def test_infer_freq_pyarrow(): + # GH#58403 + data = ["2022-01-01T10:00:00", "2022-01-01T10:00:30", "2022-01-01T10:01:00"] + pd_series = Series(data).astype("timestamp[s][pyarrow]") + pd_index = Index(data).astype("timestamp[s][pyarrow]") + + assert frequencies.infer_freq(pd_index.values) == "30s" + assert frequencies.infer_freq(pd_series.values) == "30s" + assert frequencies.infer_freq(pd_index) == "30s" + assert frequencies.infer_freq(pd_series) == "30s" diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 4003221a06f6a..c4e6733b9a08d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -37,6 +37,7 @@ from pandas.core.dtypes.common import is_numeric_dtype from pandas.core.dtypes.dtypes import ( + ArrowDtype, DatetimeTZDtype, PeriodDtype, ) @@ -132,6 +133,14 @@ def infer_freq( if isinstance(index, ABCSeries): values = index._values + + if isinstance(index.dtype, ArrowDtype): + import pyarrow as pa + + if pa.types.is_timestamp(values.dtype.pyarrow_dtype): + # GH#58403 + values = values._to_datetimearray() + if not ( lib.is_np_dtype(values.dtype, "mM") or isinstance(values.dtype, DatetimeTZDtype)