From 5a61da80340aaf019a9f22dfb90186520ca5e4d4 Mon Sep 17 00:00:00 2001 From: Rui Amaral Date: Mon, 18 Mar 2024 15:32:34 +0000 Subject: [PATCH 1/5] fix bad datetime to str conversion in Series ctor _try_cast and add a test (#57512) --- pandas/_libs/lib.pyx | 3 ++- pandas/core/construction.py | 1 + pandas/tests/series/test_constructors.py | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7aa1cb715521e..d7170c9361f52 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -751,7 +751,8 @@ cpdef ndarray[object] ensure_string_array( # dtype check to exclude DataFrame # GH#41409 TODO: not a great place for this out = arr.astype(str).astype(object) - out[arr.isna()] = na_value + if convert_na_value: + out[arr.isna()] = na_value return out arr = arr.to_numpy(dtype=object) elif not util.is_array(arr): diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ec49340e9a516..5454f86dd83d9 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -795,6 +795,7 @@ def _try_cast( shape = arr.shape if arr.ndim > 1: arr = arr.ravel() + arr = ensure_wrapped_if_datetimelike(arr) else: shape = (len(arr),) return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape( diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3f9d5bbe806bb..30b209c4024fc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -615,6 +615,23 @@ def test_constructor_maskedarray(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", ["M8[s]", "M8[ms]", "M8[us]", "M8[ns]"]) + def test_constructor_str_object_datetime_array(self, dtype): + # GH 57512 + dt_arr = np.array( + [ + "2024-01-03T00:00:00.000000000", + "2024-01-01T00:00:00.000000000", + ], + dtype=dtype, + ) + result = Series(Series(dt_arr, dtype=str), dtype=dtype) + expected = Series(dt_arr, dtype=dtype) + tm.assert_series_equal(result, expected) + + result = Series(Series(dt_arr, dtype=object), dtype=dtype) + tm.assert_series_equal(result, expected) + def test_constructor_maskedarray_hardened(self): # Check numpy masked arrays with hard masks -- from GH24574 data = ma.masked_all((3,), dtype=float).harden_mask() From e583bac03407a9ac4281f9dc02d421e68f185e1d Mon Sep 17 00:00:00 2001 From: Rui Amaral Date: Mon, 15 Apr 2024 22:19:24 +0100 Subject: [PATCH 2/5] add whatsnew entry --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 17328e6084cb4..3c51f43c0990f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -370,8 +370,10 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :class:`Series` constructor responsible for bad datetime to str dtype conversions in ``read_csv``. (:issue:`57512`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) + - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) Strings From 654714d17d9d09ab72bdbbd1c902bf6f755a6f5f Mon Sep 17 00:00:00 2001 From: Rui Amaral Date: Mon, 15 Apr 2024 22:25:02 +0100 Subject: [PATCH 3/5] rename and move test --- pandas/tests/series/test_constructors.py | 34 ++++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 30b209c4024fc..2700e3964bdc4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -79,6 +79,23 @@ def test_infer_with_date_and_datetime(self): expected = Index(vals, dtype=object) tm.assert_index_equal(idx, expected) + @pytest.mark.parametrize("dtype", ["M8[s]", "M8[ms]", "M8[us]", "M8[ns]"]) + def test_constructor_str_object_dtypes_dt64_array(self, dtype): + # GH 57512 + dt_arr = np.array( + [ + "2024-01-03T00:00:00.000000000", + "2024-01-01T00:00:00.000000000", + ], + dtype=dtype, + ) + result = Series(Series(dt_arr, dtype=str), dtype=dtype) + expected = Series(dt_arr, dtype=dtype) + tm.assert_series_equal(result, expected) + + result = Series(Series(dt_arr, dtype=object), dtype=dtype) + tm.assert_series_equal(result, expected) + def test_unparsable_strings_with_dt64_dtype(self): # pre-2.0 these would be silently ignored and come back with object dtype vals = ["aa"] @@ -615,23 +632,6 @@ def test_constructor_maskedarray(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", ["M8[s]", "M8[ms]", "M8[us]", "M8[ns]"]) - def test_constructor_str_object_datetime_array(self, dtype): - # GH 57512 - dt_arr = np.array( - [ - "2024-01-03T00:00:00.000000000", - "2024-01-01T00:00:00.000000000", - ], - dtype=dtype, - ) - result = Series(Series(dt_arr, dtype=str), dtype=dtype) - expected = Series(dt_arr, dtype=dtype) - tm.assert_series_equal(result, expected) - - result = Series(Series(dt_arr, dtype=object), dtype=dtype) - tm.assert_series_equal(result, expected) - def test_constructor_maskedarray_hardened(self): # Check numpy masked arrays with hard masks -- from GH24574 data = ma.masked_all((3,), dtype=float).harden_mask() From 854d2ed6e7f5e63376691ef15b0f41c695b3a4a4 Mon Sep 17 00:00:00 2001 From: Rui Amaral Date: Mon, 15 Apr 2024 22:27:52 +0100 Subject: [PATCH 4/5] remove newline --- doc/source/whatsnew/v3.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3c51f43c0990f..af1e9c6cb90b5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -373,7 +373,6 @@ Conversion - Bug in :class:`Series` constructor responsible for bad datetime to str dtype conversions in ``read_csv``. (:issue:`57512`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) Strings From eccf7a270f71fef910df81326400e13de971d729 Mon Sep 17 00:00:00 2001 From: Rui Amaral Date: Tue, 30 Apr 2024 20:01:05 +0100 Subject: [PATCH 5/5] remove newline --- doc/source/whatsnew/v3.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c94dd0fa1608b..4934e67797e99 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -392,7 +392,6 @@ Numeric Conversion ^^^^^^^^^^ - - Bug in :class:`Series` constructor responsible for bad datetime to str dtype conversions in ``read_csv``. (:issue:`57512`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)