diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..847b567175a57 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1032,13 +1032,13 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`) +- Bug in :class:`pandas.Categorical` displaying string categories without quotes when using "string" dtype (:issue:`63045`) - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) - Bug in :func:`bdate_range` raising ``ValueError`` with frequency ``freq="cbh"`` (:issue:`62849`) - Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`) - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) -- Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6e01096ad158..d40c9e8cbcde2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2280,7 +2280,7 @@ def _repr_categories(self) -> list[str]: from pandas.io.formats import format as fmt formatter = None - if self.categories.dtype == "str": + if self.categories.dtype == "str" or self.categories.dtype == "string": # the extension array formatter defaults to boxed=True in format_array # override here to boxed=False to be consistent with QUOTE_NONNUMERIC formatter = cast(ExtensionArray, self.categories._values)._formatter( diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 60af3bafb62b2..ebbfbfd96d48d 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -545,3 +545,13 @@ def test_categorical_str_repr(self): result = repr(Categorical([1, "2", 3, 4])) expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" assert result == expected + + def test_categorical_with_string_dtype(self, string_dtype_no_object): + # GH 63045 - ensure categories are quoted for string dtypes + s = Series( + ["apple", "banana", "cherry", "cherry"], dtype=string_dtype_no_object + ) + result = repr(Categorical(s)) + expected = f"['apple', 'banana', 'cherry', 'cherry']\nCategories (3, {string_dtype_no_object!s}): ['apple', 'banana', 'cherry']" # noqa: E501 + + assert result == expected