pandas-dev · jreback · Apr 13, 2021 · Apr 10, 2021 · Apr 10, 2021 · Apr 11, 2021
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
@@ -498,7 +498,7 @@ values across a level. For instance:
    )
    df = pd.DataFrame(np.random.randn(4, 2), index=midx)
    df
-   df2 = df.mean(level=0)
+   df2 = df.groupby(level=0).mean()
    df2
    df2.reindex(df.index, level=0)
 

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -633,7 +633,7 @@ even if some categories are not present in the data:
         data=[[1, 2, 3], [4, 5, 6]],
         columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
     )
-    df.sum(axis=1, level=1)
+    df.groupby(axis=1, level=1).sum()
 
 Groupby will also show "unused" categories:
 

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -320,14 +320,6 @@ number:
 
    s.groupby(level="second").sum()
 
-The aggregation functions such as ``sum`` will take the level parameter
-directly. Additionally, the resulting index will be named according to the
-chosen level:
-
-.. ipython:: python
-
-   s.sum(level="second")
-
 Grouping with multiple levels is supported.
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst
@@ -154,6 +154,7 @@ Other enhancements:
 - ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):
 
   .. ipython:: python
+     :okwarning:
 
      s = pd.Series([False, True, False], index=[0, 0, 1])
      s.any(level=0)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -563,6 +563,7 @@ Deprecations
 - Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
 - Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)
 - Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
+- Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9479,6 +9479,13 @@ def count(
         """
         axis = self._get_axis_number(axis)
         if level is not None:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. df.count(level=1) should use df.groupby(level=1).count().",
+                FutureWarning,
+                stacklevel=2,
+            )
             return self._count_level(level, axis=axis, numeric_only=numeric_only)
 
         if numeric_only:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10368,6 +10368,13 @@ def _logical_func(
     ):
         nv.validate_logical_func((), kwargs, fname=name)
         if level is not None:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. df.any(level=1) should use df.groupby(level=1).any()",
+                FutureWarning,
+                stacklevel=4,
+            )
             if bool_only is not None:
                 raise NotImplementedError(
                     "Option bool_only is not implemented with option level."
@@ -10459,6 +10466,13 @@ def _stat_function_ddof(
         if axis is None:
             axis = self._stat_axis_number
         if level is not None:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. df.var(level=1) should use df.groupby(level=1).var().",
+                FutureWarning,
+                stacklevel=4,
+            )
             return self._agg_by_level(
                 name, axis=axis, level=level, skipna=skipna, ddof=ddof
             )
@@ -10507,6 +10521,13 @@ def _stat_function(
         if axis is None:
             axis = self._stat_axis_number
         if level is not None:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. df.median(level=1) should use df.groupby(level=1).median().",
+                FutureWarning,
+                stacklevel=4,
+            )
             return self._agg_by_level(
                 name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only
             )
@@ -10569,6 +10590,13 @@ def _min_count_stat_function(
         if axis is None:
             axis = self._stat_axis_number
         if level is not None:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. df.sum(level=1) should use df.groupby(level=1).sum().",
+                FutureWarning,
+                stacklevel=4,
+            )
             return self._agg_by_level(
                 name,
                 axis=axis,
@@ -10646,6 +10674,13 @@ def mad(self, axis=None, skipna=None, level=None):
         if axis is None:
             axis = self._stat_axis_number
         if level is not None:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. df.mad(level=1) should use df.groupby(level=1).mad()",
+                FutureWarning,
+                stacklevel=3,
+            )
             return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna)
 
         data = self._get_numeric_data()

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1894,8 +1894,16 @@ def count(self, level=None):
         """
         if level is None:
             return notna(self._values).sum()
-        elif not isinstance(self.index, MultiIndex):
-            raise ValueError("Series.count level is only valid with a MultiIndex")
+        else:
+            warnings.warn(
+                "Using the level keyword in DataFrame and Series aggregations is "
+                "deprecated and will be removed in a future version. Use groupby "
+                "instead. ser.count(level=1) should use ser.groupby(level=1).count().",
+                FutureWarning,
+                stacklevel=2,
+            )
+            if not isinstance(self.index, MultiIndex):
+                raise ValueError("Series.count level is only valid with a MultiIndex")
 
         index = self.index
         assert isinstance(index, MultiIndex)  # for mypy

diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py
@@ -1,33 +1,11 @@
-import numpy as np
-import pytest
-
 from pandas import (
     DataFrame,
-    Index,
     Series,
 )
 import pandas._testing as tm
 
 
 class TestDataFrameCount:
-    def test_count_multiindex(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-
-        frame = frame.copy()
-        frame.index.names = ["a", "b"]
-
-        result = frame.count(level="b")
-        expected = frame.count(level=1)
-        tm.assert_frame_equal(result, expected, check_names=False)
-
-        result = frame.count(level="a")
-        expected = frame.count(level=0)
-        tm.assert_frame_equal(result, expected, check_names=False)
-
-        msg = "Level x not found"
-        with pytest.raises(KeyError, match=msg):
-            frame.count(level="x")
-
     def test_count(self):
         # corner case
         frame = DataFrame()
@@ -59,85 +37,3 @@ def test_count_objects(self, float_string_frame):
 
         tm.assert_series_equal(dm.count(), df.count())
         tm.assert_series_equal(dm.count(1), df.count(1))
-
-    def test_count_level_corner(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-
-        ser = frame["A"][:0]
-        result = ser.count(level=0)
-        expected = Series(0, index=ser.index.levels[0], name="A")
-        tm.assert_series_equal(result, expected)
-
-        df = frame[:0]
-        result = df.count(level=0)
-        expected = (
-            DataFrame(
-                index=ser.index.levels[0].set_names(["first"]), columns=df.columns
-            )
-            .fillna(0)
-            .astype(np.int64)
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_count_index_with_nan(self):
-        # https://github.com/pandas-dev/pandas/issues/21824
-        df = DataFrame(
-            {
-                "Person": ["John", "Myla", None, "John", "Myla"],
-                "Age": [24.0, 5, 21.0, 33, 26],
-                "Single": [False, True, True, True, False],
-            }
-        )
-
-        # count on row labels
-        res = df.set_index(["Person", "Single"]).count(level="Person")
-        expected = DataFrame(
-            index=Index(["John", "Myla"], name="Person"),
-            columns=Index(["Age"]),
-            data=[2, 2],
-        )
-        tm.assert_frame_equal(res, expected)
-
-        # count on column labels
-        res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
-        expected = DataFrame(
-            columns=Index(["John", "Myla"], name="Person"),
-            index=Index(["Age"]),
-            data=[[2, 2]],
-        )
-        tm.assert_frame_equal(res, expected)
-
-    def test_count_level(
-        self,
-        multiindex_year_month_day_dataframe_random_data,
-        multiindex_dataframe_random_data,
-    ):
-        ymd = multiindex_year_month_day_dataframe_random_data
-        frame = multiindex_dataframe_random_data
-
-        def _check_counts(frame, axis=0):
-            index = frame._get_axis(axis)
-            for i in range(index.nlevels):
-                result = frame.count(axis=axis, level=i)
-                expected = frame.groupby(axis=axis, level=i).count()
-                expected = expected.reindex_like(result).astype("i8")
-                tm.assert_frame_equal(result, expected)
-
-        frame.iloc[1, [1, 2]] = np.nan
-        frame.iloc[7, [0, 1]] = np.nan
-        ymd.iloc[1, [1, 2]] = np.nan
-        ymd.iloc[7, [0, 1]] = np.nan
-
-        _check_counts(frame)
-        _check_counts(ymd)
-        _check_counts(frame.T, axis=1)
-        _check_counts(ymd.T, axis=1)
-
-        # can't call with level on regular DataFrame
-        df = tm.makeTimeDataFrame()
-        with pytest.raises(TypeError, match="hierarchical"):
-            df.count(level=0)
-
-        frame["D"] = "foo"
-        result = frame.count(level=0, numeric_only=True)
-        tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))
diff --git a/pandas/tests/frame/methods/test_count_with_level_deprecated.py b/pandas/tests/frame/methods/test_count_with_level_deprecated.py
@@ -0,0 +1,123 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+)
+import pandas._testing as tm
+
+
+class TestDataFrameCount:
+    def test_count_multiindex(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+
+        frame = frame.copy()
+        frame.index.names = ["a", "b"]
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = frame.count(level="b")
+        with tm.assert_produces_warning(FutureWarning):
+            expected = frame.count(level=1)
+        tm.assert_frame_equal(result, expected, check_names=False)
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = frame.count(level="a")
+        with tm.assert_produces_warning(FutureWarning):
+            expected = frame.count(level=0)
+        tm.assert_frame_equal(result, expected, check_names=False)
+
+        msg = "Level x not found"
+        with pytest.raises(KeyError, match=msg):
+            with tm.assert_produces_warning(FutureWarning):
+                frame.count(level="x")
+
+    def test_count_level_corner(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+
+        ser = frame["A"][:0]
+        with tm.assert_produces_warning(FutureWarning):
+            result = ser.count(level=0)
+        expected = Series(0, index=ser.index.levels[0], name="A")
+        tm.assert_series_equal(result, expected)
+
+        df = frame[:0]
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.count(level=0)
+        expected = (
+            DataFrame(
+                index=ser.index.levels[0].set_names(["first"]), columns=df.columns
+            )
+            .fillna(0)
+            .astype(np.int64)
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_count_index_with_nan(self):
+        # https://github.com/pandas-dev/pandas/issues/21824
+        df = DataFrame(
+            {
+                "Person": ["John", "Myla", None, "John", "Myla"],
+                "Age": [24.0, 5, 21.0, 33, 26],
+                "Single": [False, True, True, True, False],
+            }
+        )
+
+        # count on row labels
+        with tm.assert_produces_warning(FutureWarning):
+            res = df.set_index(["Person", "Single"]).count(level="Person")
+        expected = DataFrame(
+            index=Index(["John", "Myla"], name="Person"),
+            columns=Index(["Age"]),
+            data=[2, 2],
+        )
+        tm.assert_frame_equal(res, expected)
+
+        # count on column labels
+        with tm.assert_produces_warning(FutureWarning):
+            res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
+        expected = DataFrame(
+            columns=Index(["John", "Myla"], name="Person"),
+            index=Index(["Age"]),
+            data=[[2, 2]],
+        )
+        tm.assert_frame_equal(res, expected)
+
+    def test_count_level(
+        self,
+        multiindex_year_month_day_dataframe_random_data,
+        multiindex_dataframe_random_data,
+    ):
+        ymd = multiindex_year_month_day_dataframe_random_data
+        frame = multiindex_dataframe_random_data
+
+        def _check_counts(frame, axis=0):
+            index = frame._get_axis(axis)
+            for i in range(index.nlevels):
+                with tm.assert_produces_warning(FutureWarning):
+                    result = frame.count(axis=axis, level=i)
+                expected = frame.groupby(axis=axis, level=i).count()
+                expected = expected.reindex_like(result).astype("i8")
+                tm.assert_frame_equal(result, expected)
+
+        frame.iloc[1, [1, 2]] = np.nan
+        frame.iloc[7, [0, 1]] = np.nan
+        ymd.iloc[1, [1, 2]] = np.nan
+        ymd.iloc[7, [0, 1]] = np.nan
+
+        _check_counts(frame)
+        _check_counts(ymd)
+        _check_counts(frame.T, axis=1)
+        _check_counts(ymd.T, axis=1)
+
+        # can't call with level on regular DataFrame
+        df = tm.makeTimeDataFrame()
+        with pytest.raises(TypeError, match="hierarchical"):
+            with tm.assert_produces_warning(FutureWarning):
+                df.count(level=0)
+
+        frame["D"] = "foo"
+        with tm.assert_produces_warning(FutureWarning):
+            result = frame.count(level=0, numeric_only=True)
+        tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))