Skip to content

Commit

Permalink
DEPR: Remove df.reduction(level) (#49611)
Browse files Browse the repository at this point in the history
* DEPR: Remove df.reduction(level)

* test_*_consistency

* Fix asv

* Add issue ref
  • Loading branch information
mroeschke committed Nov 11, 2022
1 parent a23eb83 commit dbb2adc
Show file tree
Hide file tree
Showing 20 changed files with 70 additions and 778 deletions.
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,10 +454,10 @@ def setup(self, axis):
)

def time_count_level_multi(self, axis):
self.df.count(axis=axis, level=1)
self.df.count(axis=axis)

def time_count_level_mixed_dtypes_multi(self, axis):
self.df_mixed.count(axis=axis, level=1)
self.df_mixed.count(axis=axis)


class Apply:
Expand Down
20 changes: 10 additions & 10 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def time_op(self, op, dtype, axis):

class FrameMultiIndexOps:

params = ([0, 1, [0, 1]], ops)
param_names = ["level", "op"]
params = [ops]
param_names = ["op"]

def setup(self, level, op):
def setup(self, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
codes = [
np.arange(10).repeat(10000),
Expand All @@ -37,8 +37,8 @@ def setup(self, level, op):
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
self.df_func = getattr(df, op)

def time_op(self, level, op):
self.df_func(level=level)
def time_op(self, op):
self.df_func()


class SeriesOps:
Expand All @@ -56,10 +56,10 @@ def time_op(self, op, dtype):

class SeriesMultiIndexOps:

params = ([0, 1, [0, 1]], ops)
param_names = ["level", "op"]
params = [ops]
param_names = ["op"]

def setup(self, level, op):
def setup(self, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
codes = [
np.arange(10).repeat(10000),
Expand All @@ -70,8 +70,8 @@ def setup(self, level, op):
s = pd.Series(np.random.randn(len(index)), index=index)
self.s_func = getattr(s, op)

def time_op(self, level, op):
self.s_func(level=level)
def time_op(self, op):
self.s_func()


class Rank:
Expand Down
10 changes: 6 additions & 4 deletions doc/source/whatsnew/v0.15.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,13 @@ Other enhancements:
- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):

.. ipython:: python
:okwarning:
.. code-block:: python
s = pd.Series([False, True, False], index=[0, 0, 1])
s.any(level=0)
>>> s = pd.Series([False, True, False], index=[0, 0, 1])
>>> s.any(level=0)
0 True
1 False
dtype: bool
- ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`):

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ Removal of prior version deprecations/changes
- Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
- Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`)
- Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`)
- Removed the ``level`` keyword in :class:`DataFrame` and :class:`Series` aggregations; use ``groupby`` instead (:issue:`39983`)
- Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`)
- Removed deprecated :attr:`NaT.freq` (:issue:`45071`)
- Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
Expand Down
64 changes: 1 addition & 63 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
ensure_platform_int,
infer_dtype_from_object,
is_1d_only_ea_dtype,
is_bool_dtype,
Expand Down Expand Up @@ -10331,7 +10330,7 @@ def c(x):
# ----------------------------------------------------------------------
# ndarray-like stats methods

def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False):
def count(self, axis: Axis = 0, numeric_only: bool = False):
"""
Count non-NA cells for each column or row.
Expand All @@ -10343,10 +10342,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
axis : {0 or 'index', 1 or 'columns'}, default 0
If 0 or 'index' counts are generated for each column.
If 1 or 'columns' counts are generated for each row.
level : int or str, optional
If the axis is a `MultiIndex` (hierarchical), count along a
particular `level`, collapsing into a `DataFrame`.
A `str` specifies the level name.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.
Expand Down Expand Up @@ -10400,16 +10395,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
dtype: int64
"""
axis = self._get_axis_number(axis)
if level is not None:
warnings.warn(
"Using the level keyword in DataFrame and Series aggregations is "
"deprecated and will be removed in a future version. Use groupby "
"instead. df.count(level=1) should use df.groupby(level=1).count().",
FutureWarning,
stacklevel=find_stack_level(),
)
res = self._count_level(level, axis=axis, numeric_only=numeric_only)
return res.__finalize__(self, method="count")

if numeric_only:
frame = self._get_numeric_data()
Expand All @@ -10434,53 +10419,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)

return result.astype("int64").__finalize__(self, method="count")

def _count_level(self, level: Level, axis: AxisInt = 0, numeric_only: bool = False):
if numeric_only:
frame = self._get_numeric_data()
else:
frame = self

count_axis = frame._get_axis(axis)
agg_axis = frame._get_agg_axis(axis)

if not isinstance(count_axis, MultiIndex):
raise TypeError(
f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
)

# Mask NaNs: Mask rows or columns where the index level is NaN, and all
# values in the DataFrame that are NaN
if frame._is_mixed_type:
# Since we have mixed types, calling notna(frame.values) might
# upcast everything to object
values_mask = notna(frame).values
else:
# But use the speedup when we have homogeneous dtypes
values_mask = notna(frame.values)

index_mask = notna(count_axis.get_level_values(level=level))
if axis == 1:
mask = index_mask & values_mask
else:
mask = index_mask.reshape(-1, 1) & values_mask

if isinstance(level, int):
level_number = level
else:
level_number = count_axis._get_level_number(level)

level_name = count_axis._names[level_number]
level_index = count_axis.levels[level_number]._rename(name=level_name)
level_codes = ensure_platform_int(count_axis.codes[level_number])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis)

if axis == 1:
result = self._constructor(counts, index=agg_axis, columns=level_index)
else:
result = self._constructor(counts, index=level_index, columns=agg_axis)

return result

def _reduce(
self,
op,
Expand Down
Loading

0 comments on commit dbb2adc

Please sign in to comment.