diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 77ce303dc1bfe..41355645fca26 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -26,6 +26,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`) +- Fixed bug in :meth:`Series.value_counts` not preserving object dtype when ``infer_string`` is set (:issue:`56187`) .. --------------------------------------------------------------------------- .. _whatsnew_214.other: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 82de8ae96160f..1d93845f2f2ed 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -871,6 +871,8 @@ def value_counts_internal( Series, ) + input_dtype = None if not isinstance(values, Series) else values.dtype + index_name = getattr(values, "name", None) name = "proportion" if normalize else "count" @@ -929,7 +931,7 @@ def value_counts_internal( # For backwards compatibility, we let Index do its normal type # inference, _except_ for if if infers from object to bool. - idx = Index(keys) + idx = Index(keys, dtype=input_dtype) if idx.dtype == bool and keys.dtype == object: idx = idx.astype(object) elif idx.dtype != keys.dtype: diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index 859010d9c79c6..422f0fac37f6d 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -269,3 +269,14 @@ def test_value_counts_masked(self): [2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count" ) tm.assert_series_equal(result, expected) + + def test_value_counts_infer_string(self): + # GH#56187 + pytest.importorskip("pyarrow") + + ser = Series(["a", "b"], dtype=object) + + with pd.option_context("future.infer_string", True): + result = ser.value_counts() + expected = Series([1, 1], index=Index(["a", "b"], dtype=object), name="count") + tm.assert_series_equal(result, expected)