DEPR: Revert DataFrameGroupBy.apply operating on the group keys (#52921)

Revert "DEPR: DataFrameGroupBy.apply operating on the group keys (#52477)" This reverts commit 9b20759.
pandas-dev · Apr 28, 2023 · 9f5b44c · 9f5b44c
1 parent d607694
commit 9f5b44c
Show file tree

Hide file tree

Showing 30 changed files with 256 additions and 704 deletions.
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
    df
 
    # List the size of the animals with the highest weight.
-   df.groupby("animal")[["size", "weight"]].apply(lambda subf: subf["size"][subf["weight"].idxmax()])
+   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
 
 `Using get_group
 <https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
        return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
 
 
-   expected_df = gb[["size", "weight"]].apply(GrowUp)
+   expected_df = gb.apply(GrowUp)
    expected_df
 
 `Expanding apply

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -429,12 +429,6 @@ This is mainly syntactic sugar for the alternative, which is much more verbose:
 Additionally, this method avoids recomputing the internal grouping information
 derived from the passed key.
 
-You can also include the grouping columns if you want to operate on them.
-
-.. ipython:: python
-
-   grouped[["A", "B"]].sum()
-
 .. _groupby.iterating-label:
 
 Iterating through groups
@@ -1072,7 +1066,7 @@ missing values with the ``ffill()`` method.
    ).set_index("date")
    df_re
 
-   df_re.groupby("group")[["val"]].resample("1D").ffill()
+   df_re.groupby("group").resample("1D").ffill()
 
 .. _groupby.filter:
 
@@ -1238,13 +1232,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=True)[["B", "C", "D"]].apply(lambda x: x)
+    df.groupby("A", group_keys=True).apply(lambda x: x)
 
 with
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=False)[["B", "C", "D"]].apply(lambda x: x)
+    df.groupby("A", group_keys=False).apply(lambda x: x)
 
 
 Numba Accelerated Routines
@@ -1728,7 +1722,7 @@ column index name will be used as the name of the inserted column:
        result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
        return pd.Series(result, name="metrics")
 
-   result = df.groupby("a")[["b", "c"]].apply(compute_metrics)
+   result = df.groupby("a").apply(compute_metrics)
 
    result
 

diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst
@@ -328,25 +328,13 @@ More consistent behavior for some groupby methods:
 
 - groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation:
 
-  .. code-block:: ipython
-
-     In [1]: df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-
-     In [2]: g = df.groupby('A')
-
-     In [3]: g.head(1)  # filters DataFrame
-     Out[3]:
-        A  B
-     0  1  2
-     2  5  6
+  .. ipython:: python
 
-     In [4]: g.apply(lambda x: x.head(1))  # used to simply fall-through
-     Out[4]:
-          A  B
-     A
-     1 0  1  2
-     5 2  5  6
+     df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+     g = df.groupby('A')
+     g.head(1)  # filters DataFrame
 
+     g.apply(lambda x: x.head(1))  # used to simply fall-through
 
 - groupby head and tail respect column selection:
 

diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
@@ -77,52 +77,9 @@ Previously you would have to do this to get a rolling window mean per-group:
    df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
    df
 
-.. code-block:: ipython
+.. ipython:: python
 
-   In [1]: df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
-   Out[1]:
-   A
-   1  0      NaN
-      1      NaN
-      2      NaN
-      3      1.5
-      4      2.5
-      5      3.5
-      6      4.5
-      7      5.5
-      8      6.5
-      9      7.5
-      10     8.5
-      11     9.5
-      12    10.5
-      13    11.5
-      14    12.5
-      15    13.5
-      16    14.5
-      17    15.5
-      18    16.5
-      19    17.5
-   2  20     NaN
-      21     NaN
-      22     NaN
-      23    21.5
-      24    22.5
-      25    23.5
-      26    24.5
-      27    25.5
-      28    26.5
-      29    27.5
-      30    28.5
-      31    29.5
-   3  32     NaN
-      33     NaN
-      34     NaN
-      35    33.5
-      36    34.5
-      37    35.5
-      38    36.5
-      39    37.5
-   Name: B, dtype: float64
+   df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
 
 Now you can do:
 
@@ -144,53 +101,15 @@ For ``.resample(..)`` type of operations, previously you would have to:
 
    df
 
-.. code-block:: ipython
+.. ipython:: python
 
-   In[1]: df.groupby("group").apply(lambda x: x.resample("1D").ffill())
-   Out[1]:
-                     group  val
-   group date
-   1     2016-01-03      1    5
-         2016-01-04      1    5
-         2016-01-05      1    5
-         2016-01-06      1    5
-         2016-01-07      1    5
-         2016-01-08      1    5
-         2016-01-09      1    5
-         2016-01-10      1    6
-   2     2016-01-17      2    7
-         2016-01-18      2    7
-         2016-01-19      2    7
-         2016-01-20      2    7
-         2016-01-21      2    7
-         2016-01-22      2    7
-         2016-01-23      2    7
-         2016-01-24      2    8
+   df.groupby("group").apply(lambda x: x.resample("1D").ffill())
 
 Now you can do:
 
-.. code-block:: ipython
+.. ipython:: python
 
-   In[1]: df.groupby("group").resample("1D").ffill()
-   Out[1]:
-                     group  val
-   group date
-   1     2016-01-03      1    5
-         2016-01-04      1    5
-         2016-01-05      1    5
-         2016-01-06      1    5
-         2016-01-07      1    5
-         2016-01-08      1    5
-         2016-01-09      1    5
-         2016-01-10      1    6
-   2     2016-01-17      2    7
-         2016-01-18      2    7
-         2016-01-19      2    7
-         2016-01-20      2    7
-         2016-01-21      2    7
-         2016-01-22      2    7
-         2016-01-23      2    7
-         2016-01-24      2    8
+   df.groupby("group").resample("1D").ffill()
 
 .. _whatsnew_0181.enhancements.method_chain:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8583,20 +8583,20 @@ def update(
         >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
         ...                               'Parrot', 'Parrot'],
         ...                    'Max Speed': [380., 370., 24., 26.]})
-        >>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
-                  Max Speed
+        >>> df.groupby("Animal", group_keys=True).apply(lambda x: x)
+                  Animal  Max Speed
         Animal
-        Falcon 0      380.0
-               1      370.0
-        Parrot 2       24.0
-               3       26.0
-
-        >>> df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x)
-           Max Speed
-        0      380.0
-        1      370.0
-        2       24.0
-        3       26.0
+        Falcon 0  Falcon      380.0
+               1  Falcon      370.0
+        Parrot 2  Parrot       24.0
+               3  Parrot       26.0
+
+        >>> df.groupby("Animal", group_keys=False).apply(lambda x: x)
+           Animal  Max Speed
+        0  Falcon      380.0
+        1  Falcon      370.0
+        2  Parrot       24.0
+        3  Parrot       26.0
         """
         )
     )

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -260,7 +260,7 @@ class providing the base-class of operations.
     each group together into a Series, including setting the index as
     appropriate:
 
-    >>> g1[['B', 'C']].apply(lambda x: x.C.max() - x.B.min())
+    >>> g1.apply(lambda x: x.C.max() - x.B.min())
     A
     a    5
     b    2
@@ -1488,16 +1488,6 @@ def f(g):
         with option_context("mode.chained_assignment", None):
             try:
                 result = self._python_apply_general(f, self._selected_obj)
-                if (
-                    not isinstance(self.obj, Series)
-                    and self._selection is None
-                    and self._selected_obj.shape != self._obj_with_exclusions.shape
-                ):
-                    warnings.warn(
-                        message=_apply_groupings_depr.format(type(self).__name__),
-                        category=FutureWarning,
-                        stacklevel=find_stack_level(),
-                    )
             except TypeError:
                 # gh-20949
                 # try again, with .apply acting as a filtering
@@ -2659,55 +2649,55 @@ def resample(self, rule, *args, **kwargs):
         Downsample the DataFrame into 3 minute bins and sum the values of
         the timestamps falling into a bin.
 
-        >>> df.groupby('a')[['b']].resample('3T').sum()
-                                 b
+        >>> df.groupby('a').resample('3T').sum()
+                                 a  b
         a
-        0   2000-01-01 00:00:00  2
-            2000-01-01 00:03:00  1
-        5   2000-01-01 00:00:00  1
+        0   2000-01-01 00:00:00  0  2
+            2000-01-01 00:03:00  0  1
+        5   2000-01-01 00:00:00  5  1
 
         Upsample the series into 30 second bins.
 
-        >>> df.groupby('a')[['b']].resample('30S').sum()
-                            b
+        >>> df.groupby('a').resample('30S').sum()
+                            a  b
         a
-        0   2000-01-01 00:00:00  1
-            2000-01-01 00:00:30  0
-            2000-01-01 00:01:00  1
-            2000-01-01 00:01:30  0
-            2000-01-01 00:02:00  0
-            2000-01-01 00:02:30  0
-            2000-01-01 00:03:00  1
-        5   2000-01-01 00:02:00  1
+        0   2000-01-01 00:00:00  0  1
+            2000-01-01 00:00:30  0  0
+            2000-01-01 00:01:00  0  1
+            2000-01-01 00:01:30  0  0
+            2000-01-01 00:02:00  0  0
+            2000-01-01 00:02:30  0  0
+            2000-01-01 00:03:00  0  1
+        5   2000-01-01 00:02:00  5  1
 
         Resample by month. Values are assigned to the month of the period.
 
-        >>> df.groupby('a')[['b']].resample('M').sum()
-                    b
+        >>> df.groupby('a').resample('M').sum()
+                    a  b
         a
-        0   2000-01-31  3
-        5   2000-01-31  1
+        0   2000-01-31  0  3
+        5   2000-01-31  5  1
 
         Downsample the series into 3 minute bins as above, but close the right
         side of the bin interval.
 
-        >>> df.groupby('a')[['b']].resample('3T', closed='right').sum()
-                                 b
+        >>> df.groupby('a').resample('3T', closed='right').sum()
+                                 a  b
         a
-        0   1999-12-31 23:57:00  1
-            2000-01-01 00:00:00  2
-        5   2000-01-01 00:00:00  1
+        0   1999-12-31 23:57:00  0  1
+            2000-01-01 00:00:00  0  2
+        5   2000-01-01 00:00:00  5  1
 
         Downsample the series into 3 minute bins and close the right side of
         the bin interval, but label each bin using the right edge instead of
         the left.
 
-        >>> df.groupby('a')[['b']].resample('3T', closed='right', label='right').sum()
-                                 b
+        >>> df.groupby('a').resample('3T', closed='right', label='right').sum()
+                                 a  b
         a
-        0   2000-01-01 00:00:00  1
-            2000-01-01 00:03:00  2
-        5   2000-01-01 00:03:00  1
+        0   2000-01-01 00:00:00  0  1
+            2000-01-01 00:03:00  0  2
+        5   2000-01-01 00:03:00  5  1
         """
         from pandas.core.resample import get_resampler_for_grouping
 
@@ -4329,13 +4319,3 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
     else:
         mi = MultiIndex.from_product([idx, qs])
     return mi
-
-
-# GH#7155
-_apply_groupings_depr = (
-    "{}.apply operated on the grouping columns. This behavior is deprecated, "
-    "and in a future version of pandas the grouping columns will be excluded "
-    "from the operation. Select the columns to operate on after groupby to "
-    "either explicitly include or exclude the groupings and silence "
-    "this warning."
-)