From 083f01a4d010e448eea2f17c23e8aac865a06cb1 Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Fri, 10 Oct 2025 15:43:52 +0800 Subject: [PATCH] fix: pd.to_numeric handling of datetime --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 15 +++++++- pandas/core/tools/numeric.py | 8 +++-- pandas/io/parsers/base_parser.py | 1 + pandas/tests/tools/test_to_numeric.py | 50 +++++++++++++++++++++++++++ 6 files changed, 72 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..db86405605c08 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1191,6 +1191,7 @@ Other - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`) +- Bug in :func:`to_numeric` for ``datetime``, :class:`Series` and ``NaT`` conversions. (:issue:`43280`) - Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` raising ``RecursionError`` when passing ``func=list[int]``. (:issue:`61565`) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e50b301c34868..801efd438e9a7 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -124,6 +124,7 @@ def maybe_convert_numeric( na_values: set, convert_empty: bool = ..., coerce_numeric: bool = ..., + convert_datetime: bool = ..., convert_to_masked_nullable: Literal[False] = ..., ) -> tuple[np.ndarray, None]: ... @overload diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 83a1b09f00a11..95427199be179 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2300,6 +2300,7 @@ def maybe_convert_numeric( set na_values, bint convert_empty=True, bint coerce_numeric=False, + bint convert_datetime=True, bint convert_to_masked_nullable=False, ) -> tuple[np.ndarray, np.ndarray | None]: """ @@ -2449,6 +2450,18 @@ def maybe_convert_numeric( elif is_decimal(val): floats[i] = complexes[i] = val seen.float_ = True + elif convert_datetime and (PyDate_Check(val) or cnp.is_datetime64_object(val)): + # convert_datetime flag avoids conversion for base_readers + # PyDate_Check also includes PyDatetime_Check + seen.datetime_ = True + if val in na_values or checknull(val): + seen.saw_null() + mask[i] = 1 + floats[i] = NaN + else: + ints[i] = np.datetime64(val).astype(int) + # because of pd.NaT, we may need to return in floats #GH 42380 + floats[i] = float(ints[i]) else: try: floatify(val, &fval, &maybe_int) @@ -2517,7 +2530,7 @@ def maybe_convert_numeric( if seen.null_ and convert_to_masked_nullable: return (floats, mask.view(np.bool_)) return (floats, None) - elif seen.int_: + elif seen.int_ or seen.datetime_: if seen.null_ and convert_to_masked_nullable: if seen.uint_: return (uints, mask.view(np.bool_)) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 14921457194ca..0dbd57981b529 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -204,8 +204,12 @@ def to_numeric( return float(arg) if is_number(arg): return arg - if isinstance(arg, (Timedelta, Timestamp)): + if isinstance(arg, Timedelta): return arg._value + if isinstance(arg, Timestamp): + if arg.tzinfo: + arg = arg.tz_convert("UTC").replace(tzinfo=None) + is_scalars = True values = np.array([arg], dtype="O") elif getattr(arg, "ndim", 1) > 1: @@ -227,8 +231,6 @@ def to_numeric( new_mask: np.ndarray | None = None if is_numeric_dtype(values_dtype): pass - elif lib.is_np_dtype(values_dtype, "mM"): - values = values.view(np.int64) else: values = ensure_object(values) coerce_numeric = errors != "raise" diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 1d96385a92cd3..313484ba5b51b 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -490,6 +490,7 @@ def _infer_types( values, na_values, False, + convert_datetime=False, convert_to_masked_nullable=non_default_dtype_backend, # type: ignore[arg-type] ) except (ValueError, TypeError): diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 12e6be18244e1..8a40377d867d5 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -1,4 +1,6 @@ +from datetime import datetime import decimal +from functools import partial import numpy as np from numpy import iinfo @@ -902,6 +904,54 @@ def test_to_numeric_dtype_backend_error(dtype_backend): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "input_value, expected, pd_type", + [ + (datetime(2021, 8, 22), 1629590400000000, "scalar"), + (datetime(2025, 2, 21), 1740096000000000, "scalar"), + (pd.NaT, np.nan, "scalar"), + ([datetime(2021, 8, 22)], [1629590400000000], "series"), + ([datetime(2025, 2, 21)], [1740096000000000], "series"), + ([pd.NaT], [np.nan], "series"), + ([datetime(2021, 8, 22), pd.NaT], [float(1629590400000000), np.nan], "series"), + ([pd.NaT, datetime(2021, 8, 22)], [np.nan, float(1629590400000000)], "series"), + ( + ["apple", 1, datetime(2021, 8, 22)], + [np.nan, float(1.0), float(1629590400000000)], + "series_coerce", + ), + ([pd.NaT], [np.nan], "series_partial"), + ([datetime(2025, 2, 21)], [1740096000000000], "series_partial"), + ( + [pd.NaT, datetime(2025, 2, 21)], + [np.nan, float(1740096000000000)], + "series_partial", + ), + ], +) +def test_to_numeric_datetime(input_value, expected, pd_type): + """Test converting a scalar datetime to numeric.""" + if pd_type == "scalar": + val = to_numeric(input_value) + # special handling because Nan!=Nan + if pd.isna(expected): + assert pd.isna(val) + else: + assert val == expected + + elif pd_type == "series": + val = to_numeric(Series(input_value)) + tm.assert_series_equal(val, Series(expected)) + + elif pd_type == "series_coerce": + val = to_numeric(Series(input_value), errors="coerce") + tm.assert_series_equal(val, Series(expected)) + + elif pd_type == "series_partial": + val = Series(input_value).apply(partial(to_numeric)) + tm.assert_series_equal(val, Series(expected)) + + def test_invalid_dtype_backend(): ser = Series([1, 2, 3]) msg = (