DEPR: Remove df.reduction(level) (#49611)

* DEPR: Remove df.reduction(level) * test_*_consistency * Fix asv * Add issue ref
pandas-dev · Nov 11, 2022 · dbb2adc · dbb2adc
1 parent a23eb83
commit dbb2adc
Show file tree

Hide file tree

Showing 20 changed files with 70 additions and 778 deletions.
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -454,10 +454,10 @@ def setup(self, axis):
         )
 
     def time_count_level_multi(self, axis):
-        self.df.count(axis=axis, level=1)
+        self.df.count(axis=axis)
 
     def time_count_level_mixed_dtypes_multi(self, axis):
-        self.df_mixed.count(axis=axis, level=1)
+        self.df_mixed.count(axis=axis)
 
 
 class Apply:

diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
@@ -23,10 +23,10 @@ def time_op(self, op, dtype, axis):
 
 class FrameMultiIndexOps:
 
-    params = ([0, 1, [0, 1]], ops)
-    param_names = ["level", "op"]
+    params = [ops]
+    param_names = ["op"]
 
-    def setup(self, level, op):
+    def setup(self, op):
         levels = [np.arange(10), np.arange(100), np.arange(100)]
         codes = [
             np.arange(10).repeat(10000),
@@ -37,8 +37,8 @@ def setup(self, level, op):
         df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
         self.df_func = getattr(df, op)
 
-    def time_op(self, level, op):
-        self.df_func(level=level)
+    def time_op(self, op):
+        self.df_func()
 
 
 class SeriesOps:
@@ -56,10 +56,10 @@ def time_op(self, op, dtype):
 
 class SeriesMultiIndexOps:
 
-    params = ([0, 1, [0, 1]], ops)
-    param_names = ["level", "op"]
+    params = [ops]
+    param_names = ["op"]
 
-    def setup(self, level, op):
+    def setup(self, op):
         levels = [np.arange(10), np.arange(100), np.arange(100)]
         codes = [
             np.arange(10).repeat(10000),
@@ -70,8 +70,8 @@ def setup(self, level, op):
         s = pd.Series(np.random.randn(len(index)), index=index)
         self.s_func = getattr(s, op)
 
-    def time_op(self, level, op):
-        self.s_func(level=level)
+    def time_op(self, op):
+        self.s_func()
 
 
 class Rank:

diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst
@@ -154,11 +154,13 @@ Other enhancements:
 
 - ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):
 
-  .. ipython:: python
-     :okwarning:
+  .. code-block:: python
 
-     s = pd.Series([False, True, False], index=[0, 0, 1])
-     s.any(level=0)
+     >>> s = pd.Series([False, True, False], index=[0, 0, 1])
+     >>> s.any(level=0)
+     0     True
+     1    False
+     dtype: bool
 
 - ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`):
 

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -469,6 +469,7 @@ Removal of prior version deprecations/changes
 - Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
 - Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`)
 - Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`)
+- Removed the ``level`` keyword in :class:`DataFrame` and :class:`Series` aggregations; use ``groupby`` instead (:issue:`39983`)
 - Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`)
 - Removed deprecated :attr:`NaT.freq` (:issue:`45071`)
 - Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -118,7 +118,6 @@
     maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
-    ensure_platform_int,
     infer_dtype_from_object,
     is_1d_only_ea_dtype,
     is_bool_dtype,
@@ -10331,7 +10330,7 @@ def c(x):
     # ----------------------------------------------------------------------
     # ndarray-like stats methods
 
-    def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False):
+    def count(self, axis: Axis = 0, numeric_only: bool = False):
         """
         Count non-NA cells for each column or row.
 
@@ -10343,10 +10342,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
         axis : {0 or 'index', 1 or 'columns'}, default 0
             If 0 or 'index' counts are generated for each column.
             If 1 or 'columns' counts are generated for each row.
-        level : int or str, optional
-            If the axis is a `MultiIndex` (hierarchical), count along a
-            particular `level`, collapsing into a `DataFrame`.
-            A `str` specifies the level name.
         numeric_only : bool, default False
             Include only `float`, `int` or `boolean` data.
 
@@ -10400,16 +10395,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
         dtype: int64
         """
         axis = self._get_axis_number(axis)
-        if level is not None:
-            warnings.warn(
-                "Using the level keyword in DataFrame and Series aggregations is "
-                "deprecated and will be removed in a future version. Use groupby "
-                "instead. df.count(level=1) should use df.groupby(level=1).count().",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            res = self._count_level(level, axis=axis, numeric_only=numeric_only)
-            return res.__finalize__(self, method="count")
 
         if numeric_only:
             frame = self._get_numeric_data()
@@ -10434,53 +10419,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
 
         return result.astype("int64").__finalize__(self, method="count")
 
-    def _count_level(self, level: Level, axis: AxisInt = 0, numeric_only: bool = False):
-        if numeric_only:
-            frame = self._get_numeric_data()
-        else:
-            frame = self
-
-        count_axis = frame._get_axis(axis)
-        agg_axis = frame._get_agg_axis(axis)
-
-        if not isinstance(count_axis, MultiIndex):
-            raise TypeError(
-                f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
-            )
-
-        # Mask NaNs: Mask rows or columns where the index level is NaN, and all
-        # values in the DataFrame that are NaN
-        if frame._is_mixed_type:
-            # Since we have mixed types, calling notna(frame.values) might
-            # upcast everything to object
-            values_mask = notna(frame).values
-        else:
-            # But use the speedup when we have homogeneous dtypes
-            values_mask = notna(frame.values)
-
-        index_mask = notna(count_axis.get_level_values(level=level))
-        if axis == 1:
-            mask = index_mask & values_mask
-        else:
-            mask = index_mask.reshape(-1, 1) & values_mask
-
-        if isinstance(level, int):
-            level_number = level
-        else:
-            level_number = count_axis._get_level_number(level)
-
-        level_name = count_axis._names[level_number]
-        level_index = count_axis.levels[level_number]._rename(name=level_name)
-        level_codes = ensure_platform_int(count_axis.codes[level_number])
-        counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis)
-
-        if axis == 1:
-            result = self._constructor(counts, index=agg_axis, columns=level_index)
-        else:
-            result = self._constructor(counts, index=level_index, columns=agg_axis)
-
-        return result
-
     def _reduce(
         self,
         op,