From 9ead4fffdfed74101e8520103c9c6e9deebe0bf4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 13 Sep 2025 08:36:48 +0200 Subject: [PATCH 1/2] [2.3.x] Only use new string dtype repr for the new (NaN-based) string dtype --- pandas/core/arrays/string_.py | 11 +++++++---- pandas/core/generic.py | 13 ++++++------- pandas/io/formats/format.py | 3 +++ pandas/tests/arrays/string_/test_string.py | 4 ++-- pandas/tests/io/formats/test_to_string.py | 6 +++--- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 41338627cb2e0..edc5529d0d78c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -119,10 +119,10 @@ class StringDtype(StorageExtensionDtype): Examples -------- >>> pd.StringDtype() - )> + string[python] >>> pd.StringDtype(storage="pyarrow") - )> + string[pyarrow] """ @property @@ -194,8 +194,11 @@ def __init__( self._na_value = na_value def __repr__(self) -> str: - storage = "" if self.storage == "pyarrow" else "storage='python', " - return f"" + if self._na_value is libmissing.NA: + return f"{self.name}[{self.storage}]" + else: + storage = "" if self.storage == "pyarrow" else "storage='python', " + return f"" def __eq__(self, other: object) -> bool: # we need to override the base class __eq__ because na_value (NA or NaN) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 60e63334d39b1..ab998e90f89cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7021,13 +7021,12 @@ def convert_dtypes( 2 3 z 20 200.0 >>> dfn.dtypes - a Int32 - b string - c boolean - d string - e Int64 - f Float64 - dtype: object + a Int32 + b string[python] + c boolean + d string[python] + e Int64 + f Float64 Start with a Series of strings and missing data represented by ``np.nan``. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 47130f5b1fb65..bcbee6ce2662f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -66,6 +66,7 @@ ExtensionArray, TimedeltaArray, ) +from pandas.core.arrays.string_ import StringDtype from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexes.api import ( @@ -1231,6 +1232,8 @@ def _format(x): return self.na_rep elif isinstance(x, PandasObject): return str(x) + elif isinstance(x, StringDtype) and x.na_value is NA: + return repr(x) else: # object dtype return str(formatter(x)) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index da949d03a1a77..8c15f0c98f90e 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -126,11 +126,11 @@ def test_repr(dtype): def test_dtype_repr(dtype): if dtype.storage == "pyarrow": if dtype.na_value is pd.NA: - assert repr(dtype) == ")>" + assert repr(dtype) == "string[pyarrow]" else: assert repr(dtype) == "" elif dtype.na_value is pd.NA: - assert repr(dtype) == ")>" + assert repr(dtype) == "string[python]" else: assert repr(dtype) == "" diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 62e92b8de265c..164e514262603 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -758,9 +758,9 @@ def test_to_string_string_dtype(self): result = df.dtypes.to_string() expected = dedent( """\ - x string - y string - z int64[pyarrow]""" + x string[pyarrow] + y string[python] + z int64[pyarrow]""" ) assert result == expected From 2d040baef34a820c4c57c975ba9ea5bed8ae4a3c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 13 Sep 2025 16:43:04 +0200 Subject: [PATCH 2/2] fixup docstring --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ab998e90f89cf..70b72577dd5d1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7027,6 +7027,7 @@ def convert_dtypes( d string[python] e Int64 f Float64 + dtype: object Start with a Series of strings and missing data represented by ``np.nan``.