Backport PR #52071: DEPR: Properly enforce group_keys defaulting to F…

…alse in resample (#52078) Backport PR #52071: DEPR: Properly enforce group_keys defaulting to False in resample
pandas-dev · Mar 19, 2023 · 51121ee · 51121ee
1 parent 64cc028
commit 51121ee
Show file tree

Hide file tree

Showing 9 changed files with 37 additions and 45 deletions.
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1240,18 +1240,8 @@ a common dtype will be determined in the same way as ``DataFrame`` construction.
 Control grouped column(s) placement with ``group_keys``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionchanged:: 1.5.0
-
-   If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`,
-   functions passed to ``apply`` that return like-indexed outputs will have the
-   group keys added to the result index. Previous versions of pandas would add
-   the group keys only when the result from the applied function had a different
-   index than the input. If ``group_keys`` is not specified, the group keys will
-   not be added for like-indexed outputs. In the future this behavior
-   will change to always respect ``group_keys``, which defaults to ``True``.
-
 To control whether the grouped column(s) are included in the indices, you can use
-the argument ``group_keys``. Compare
+the argument ``group_keys`` which defaults to ``True``. Compare
 
 .. ipython:: python
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10989,7 +10989,7 @@ def resample(
         level: Level = None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
-        group_keys: bool | lib.NoDefault = no_default,
+        group_keys: bool = False,
     ) -> Resampler:
         return super().resample(
             rule=rule,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8509,7 +8509,7 @@ def resample(
         level: Level = None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
-        group_keys: bool_t | lib.NoDefault = lib.no_default,
+        group_keys: bool_t = False,
     ) -> Resampler:
         """
         Resample time-series data.
@@ -8570,17 +8570,20 @@ def resample(
 
             .. versionadded:: 1.1.0
 
-        group_keys : bool, optional
+        group_keys : bool, default False
             Whether to include the group keys in the result index when using
-            ``.apply()`` on the resampled object. Not specifying ``group_keys``
-            will retain values-dependent behavior from pandas 1.4
-            and earlier (see :ref:`pandas 1.5.0 Release notes
-            <whatsnew_150.enhancements.resample_group_keys>`
-            for examples). In a future version of pandas, the behavior will
-            default to the same as specifying ``group_keys=False``.
+            ``.apply()`` on the resampled object.
 
             .. versionadded:: 1.5.0
 
+                Not specifying ``group_keys`` will retain values-dependent behavior
+                from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes
+                <whatsnew_150.enhancements.resample_group_keys>` for examples).
+
+            .. versionchanged:: 2.0.0
+
+                ``group_keys`` now defaults to ``False``.
+
         Returns
         -------
         pandas.core.Resampler

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -627,7 +627,8 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
     axis: AxisInt
     grouper: ops.BaseGrouper
     keys: _KeysArgType | None = None
-    group_keys: bool | lib.NoDefault
+    level: IndexLabel | None = None
+    group_keys: bool
 
     @final
     def __len__(self) -> int:
@@ -905,7 +906,7 @@ def __init__(
         selection: IndexLabel | None = None,
         as_index: bool = True,
         sort: bool = True,
-        group_keys: bool | lib.NoDefault = True,
+        group_keys: bool = True,
         observed: bool = False,
         dropna: bool = True,
     ) -> None:
@@ -4240,7 +4241,7 @@ def get_groupby(
     by: _KeysArgType | None = None,
     axis: AxisInt = 0,
     grouper: ops.BaseGrouper | None = None,
-    group_keys: bool | lib.NoDefault = True,
+    group_keys: bool = True,
 ) -> GroupBy:
     klass: type[GroupBy]
     if isinstance(obj, Series):

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -152,7 +152,7 @@ def __init__(
         kind=None,
         *,
         gpr_index: Index,
-        group_keys: bool | lib.NoDefault = lib.no_default,
+        group_keys: bool = False,
         selection=None,
     ) -> None:
         self._timegrouper = timegrouper
@@ -1584,7 +1584,7 @@ def __init__(
         origin: Literal["epoch", "start", "start_day", "end", "end_day"]
         | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
-        group_keys: bool | lib.NoDefault = True,
+        group_keys: bool = False,
         **kwargs,
     ) -> None:
         # Check for correctness of the keyword arguments which would

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -35,10 +35,7 @@
     reshape,
 )
 from pandas._libs.internals import BlockValuesRefs
-from pandas._libs.lib import (
-    is_range_indexer,
-    no_default,
-)
+from pandas._libs.lib import is_range_indexer
 from pandas._typing import (
     AggFuncType,
     AlignJoin,
@@ -5701,7 +5698,7 @@ def resample(
         level: Level = None,
         origin: str | TimestampConvertibleTypes = "start_day",
         offset: TimedeltaConvertibleTypes | None = None,
-        group_keys: bool | lib.NoDefault = no_default,
+        group_keys: bool = False,
     ) -> Resampler:
         return super().resample(
             rule=rule,

diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -125,21 +125,24 @@
         Specifying ``sort=False`` with an ordered categorical grouper will no
         longer sort the values.
 
-group_keys : bool, optional
+group_keys : bool, default True
     When calling apply and the ``by`` argument produces a like-indexed
     (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
     index to identify pieces. By default group keys are not included
     when the result's index (and column) labels match the inputs, and
-    are included otherwise. This argument has no effect if the result produced
-    is not like-indexed with respect to the input.
+    are included otherwise.
 
     .. versionchanged:: 1.5.0
 
-       Warns that `group_keys` will no longer be ignored when the
+       Warns that ``group_keys`` will no longer be ignored when the
        result from ``apply`` is a like-indexed Series or DataFrame.
        Specify ``group_keys`` explicitly to include the group keys or
        not.
 
+    .. versionchanged:: 2.0.0
+
+       ``group_keys`` now defaults to ``True``.
+
 observed : bool, default False
     This only applies if any of the groupers are Categoricals.
     If True: only show observed values for categorical groupers.

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -96,24 +96,22 @@ def test_resample_group_keys():
     df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
     expected = df.copy()
 
-    # no warning
+    # group_keys=False
     g = df.resample("5D", group_keys=False)
-    with tm.assert_produces_warning(None):
-        result = g.apply(lambda x: x)
+    result = g.apply(lambda x: x)
     tm.assert_frame_equal(result, expected)
 
-    # no warning, group keys
-    expected.index = pd.MultiIndex.from_arrays(
-        [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
-    )
-
+    # group_keys defaults to False
     g = df.resample("5D")
     result = g.apply(lambda x: x)
     tm.assert_frame_equal(result, expected)
 
+    # group_keys=True
+    expected.index = pd.MultiIndex.from_arrays(
+        [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
+    )
     g = df.resample("5D", group_keys=True)
-    with tm.assert_produces_warning(None):
-        result = g.apply(lambda x: x)
+    result = g.apply(lambda x: x)
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -280,7 +280,7 @@ def f(x):
     tm.assert_frame_equal(result, expected)
 
     # A case for series
-    expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
+    expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f)
     result = df["col1"].resample("M").apply(f)
     tm.assert_series_equal(result, expected)