Skip to content

Commit

Permalink
BUG: value_counts not preserving object dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Nov 26, 2023
1 parent 762b61d commit f570a4f
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Bug fixes
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
- Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)
- Fixed bug in :meth:`Series.value_counts` not preserving object dtype when ``infer_string`` is set (:issue:`56187`)

.. ---------------------------------------------------------------------------
.. _whatsnew_214.other:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,8 @@ def value_counts_internal(
Series,
)

input_dtype = None if not isinstance(values, Series) else values.dtype

index_name = getattr(values, "name", None)
name = "proportion" if normalize else "count"

Expand Down Expand Up @@ -929,7 +931,7 @@ def value_counts_internal(

# For backwards compatibility, we let Index do its normal type
# inference, _except_ for if if infers from object to bool.
idx = Index(keys)
idx = Index(keys, dtype=input_dtype)
if idx.dtype == bool and keys.dtype == object:
idx = idx.astype(object)
elif idx.dtype != keys.dtype:
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/methods/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,14 @@ def test_value_counts_masked(self):
[2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count"
)
tm.assert_series_equal(result, expected)

def test_value_counts_infer_string(self):
# GH#56187
pytest.importorskip("pyarrow")

ser = Series(["a", "b"], dtype=object)

with pd.option_context("future.infer_string", True):
result = ser.value_counts()
expected = Series([1, 1], index=Index(["a", "b"], dtype=object), name="count")
tm.assert_series_equal(result, expected)

0 comments on commit f570a4f

Please sign in to comment.