Skip to content

Commit

Permalink
DEPR: dtype inference in value_counts (#56161)
Browse files Browse the repository at this point in the history
* DEPR: dtype inference in value_counts

* GH ref
  • Loading branch information
jbrockmendel committed Nov 26, 2023
1 parent 3530b3d commit 9ac1562
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ Other Deprecations
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
Expand Down
22 changes: 20 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,16 @@ def value_counts_internal(
idx = Index(keys)
if idx.dtype == bool and keys.dtype == object:
idx = idx.astype(object)
elif idx.dtype != keys.dtype:
warnings.warn(
# GH#56161
"The behavior of value_counts with object-dtype is deprecated. "
"In a future version, this will *not* perform dtype inference "
"on the resulting index. To retain the old behavior, use "
"`result.index = result.index.infer_objects()`",
FutureWarning,
stacklevel=find_stack_level(),
)
idx.name = index_name

result = Series(counts, index=idx, name=name, copy=False)
Expand Down Expand Up @@ -1712,8 +1722,16 @@ def union_with_duplicates(
"""
from pandas import Series

l_count = value_counts_internal(lvals, dropna=False)
r_count = value_counts_internal(rvals, dropna=False)
with warnings.catch_warnings():
# filter warning from object dtype inference; we will end up discarding
# the index here, so the deprecation does not affect the end result here.
warnings.filterwarnings(
"ignore",
"The behavior of value_counts with object-dtype is deprecated",
category=FutureWarning,
)
l_count = value_counts_internal(lvals, dropna=False)
r_count = value_counts_internal(rvals, dropna=False)
l_count, r_count = l_count.align(r_count, fill_value=0)
final_count = np.maximum(l_count.values, r_count.values)
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Union,
overload,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -1226,7 +1227,16 @@ def value_counts(self, dropna: bool = True) -> Series:
Series.value_counts
"""
# TODO: implement this is a non-naive way!
return value_counts(np.asarray(self), dropna=dropna)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"The behavior of value_counts with object-dtype is deprecated",
category=FutureWarning,
)
result = value_counts(np.asarray(self), dropna=dropna)
# Once the deprecation is enforced, we will need to do
# `result.index = result.index.astype(self.dtype)`
return result

# ---------------------------------------------------------------------
# Rendering Methods
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,3 +336,16 @@ def test_value_counts_with_nan(dropna, index_or_series):
else:
expected = Series([1, 1, 1], index=[True, pd.NA, np.nan], name="count")
tm.assert_series_equal(res, expected)


def test_value_counts_object_inference_deprecated():
# GH#56161
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")

idx = dti.astype(object)
msg = "The behavior of value_counts with object-dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = idx.value_counts()

exp = dti.value_counts()
tm.assert_series_equal(res, exp)

0 comments on commit 9ac1562

Please sign in to comment.