Skip to content

Commit

Permalink
Backport PR #52071: DEPR: Properly enforce group_keys defaulting to F…
Browse files Browse the repository at this point in the history
…alse in resample (#52078)

Backport PR #52071: DEPR: Properly enforce group_keys defaulting to False in resample
  • Loading branch information
rhshadrach committed Mar 19, 2023
1 parent 64cc028 commit 51121ee
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 45 deletions.
12 changes: 1 addition & 11 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1240,18 +1240,8 @@ a common dtype will be determined in the same way as ``DataFrame`` construction.
Control grouped column(s) placement with ``group_keys``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. versionchanged:: 1.5.0

If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`,
functions passed to ``apply`` that return like-indexed outputs will have the
group keys added to the result index. Previous versions of pandas would add
the group keys only when the result from the applied function had a different
index than the input. If ``group_keys`` is not specified, the group keys will
not be added for like-indexed outputs. In the future this behavior
will change to always respect ``group_keys``, which defaults to ``True``.

To control whether the grouped column(s) are included in the indices, you can use
the argument ``group_keys``. Compare
the argument ``group_keys`` which defaults to ``True``. Compare

.. ipython:: python
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10989,7 +10989,7 @@ def resample(
level: Level = None,
origin: str | TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool | lib.NoDefault = no_default,
group_keys: bool = False,
) -> Resampler:
return super().resample(
rule=rule,
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8509,7 +8509,7 @@ def resample(
level: Level = None,
origin: str | TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool_t | lib.NoDefault = lib.no_default,
group_keys: bool_t = False,
) -> Resampler:
"""
Resample time-series data.
Expand Down Expand Up @@ -8570,17 +8570,20 @@ def resample(
.. versionadded:: 1.1.0
group_keys : bool, optional
group_keys : bool, default False
Whether to include the group keys in the result index when using
``.apply()`` on the resampled object. Not specifying ``group_keys``
will retain values-dependent behavior from pandas 1.4
and earlier (see :ref:`pandas 1.5.0 Release notes
<whatsnew_150.enhancements.resample_group_keys>`
for examples). In a future version of pandas, the behavior will
default to the same as specifying ``group_keys=False``.
``.apply()`` on the resampled object.
.. versionadded:: 1.5.0
Not specifying ``group_keys`` will retain values-dependent behavior
from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes
<whatsnew_150.enhancements.resample_group_keys>` for examples).
.. versionchanged:: 2.0.0
``group_keys`` now defaults to ``False``.
Returns
-------
pandas.core.Resampler
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,8 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
axis: AxisInt
grouper: ops.BaseGrouper
keys: _KeysArgType | None = None
group_keys: bool | lib.NoDefault
level: IndexLabel | None = None
group_keys: bool

@final
def __len__(self) -> int:
Expand Down Expand Up @@ -905,7 +906,7 @@ def __init__(
selection: IndexLabel | None = None,
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = True,
group_keys: bool = True,
observed: bool = False,
dropna: bool = True,
) -> None:
Expand Down Expand Up @@ -4240,7 +4241,7 @@ def get_groupby(
by: _KeysArgType | None = None,
axis: AxisInt = 0,
grouper: ops.BaseGrouper | None = None,
group_keys: bool | lib.NoDefault = True,
group_keys: bool = True,
) -> GroupBy:
klass: type[GroupBy]
if isinstance(obj, Series):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def __init__(
kind=None,
*,
gpr_index: Index,
group_keys: bool | lib.NoDefault = lib.no_default,
group_keys: bool = False,
selection=None,
) -> None:
self._timegrouper = timegrouper
Expand Down Expand Up @@ -1584,7 +1584,7 @@ def __init__(
origin: Literal["epoch", "start", "start_day", "end", "end_day"]
| TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool | lib.NoDefault = True,
group_keys: bool = False,
**kwargs,
) -> None:
# Check for correctness of the keyword arguments which would
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@
reshape,
)
from pandas._libs.internals import BlockValuesRefs
from pandas._libs.lib import (
is_range_indexer,
no_default,
)
from pandas._libs.lib import is_range_indexer
from pandas._typing import (
AggFuncType,
AlignJoin,
Expand Down Expand Up @@ -5701,7 +5698,7 @@ def resample(
level: Level = None,
origin: str | TimestampConvertibleTypes = "start_day",
offset: TimedeltaConvertibleTypes | None = None,
group_keys: bool | lib.NoDefault = no_default,
group_keys: bool = False,
) -> Resampler:
return super().resample(
rule=rule,
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,24 @@
Specifying ``sort=False`` with an ordered categorical grouper will no
longer sort the values.
group_keys : bool, optional
group_keys : bool, default True
When calling apply and the ``by`` argument produces a like-indexed
(i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
index to identify pieces. By default group keys are not included
when the result's index (and column) labels match the inputs, and
are included otherwise. This argument has no effect if the result produced
is not like-indexed with respect to the input.
are included otherwise.
.. versionchanged:: 1.5.0
Warns that `group_keys` will no longer be ignored when the
Warns that ``group_keys`` will no longer be ignored when the
result from ``apply`` is a like-indexed Series or DataFrame.
Specify ``group_keys`` explicitly to include the group keys or
not.
.. versionchanged:: 2.0.0
``group_keys`` now defaults to ``True``.
observed : bool, default False
This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
Expand Down
18 changes: 8 additions & 10 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,24 +96,22 @@ def test_resample_group_keys():
df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
expected = df.copy()

# no warning
# group_keys=False
g = df.resample("5D", group_keys=False)
with tm.assert_produces_warning(None):
result = g.apply(lambda x: x)
result = g.apply(lambda x: x)
tm.assert_frame_equal(result, expected)

# no warning, group keys
expected.index = pd.MultiIndex.from_arrays(
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
)

# group_keys defaults to False
g = df.resample("5D")
result = g.apply(lambda x: x)
tm.assert_frame_equal(result, expected)

# group_keys=True
expected.index = pd.MultiIndex.from_arrays(
[pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
)
g = df.resample("5D", group_keys=True)
with tm.assert_produces_warning(None):
result = g.apply(lambda x: x)
result = g.apply(lambda x: x)
tm.assert_frame_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def f(x):
tm.assert_frame_equal(result, expected)

# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f)
result = df["col1"].resample("M").apply(f)
tm.assert_series_equal(result, expected)

Expand Down

0 comments on commit 51121ee

Please sign in to comment.