Skip to content

Commit

Permalink
DEPR: pinning name in SeriesGroupBy.agg (#51703)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Mar 3, 2023
1 parent 56508fb commit 455ffb2
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Other API changes
Deprecations
~~~~~~~~~~~~
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
-

.. ---------------------------------------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Union,
cast,
)
import warnings

import numpy as np

Expand All @@ -49,6 +50,7 @@
Substitution,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
ensure_int64,
Expand Down Expand Up @@ -270,6 +272,16 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
# pinned in _python_agg_general, only in _aggregate_named
result = self._aggregate_named(func, *args, **kwargs)

warnings.warn(
"Pinning the groupby key to each group in "
f"{type(self).__name__}.agg is deprecated, and cases that "
"relied on it will raise in a future version. "
"If your operation requires utilizing the groupby keys, "
"iterate over the groupby object instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

# result is a dict whose keys are the elements of result_index
result = Series(result, index=self.grouper.result_index)
result = self._wrap_aggregated_output(result)
Expand Down Expand Up @@ -407,6 +419,7 @@ def _aggregate_named(self, func, *args, **kwargs):
for name, group in self.grouper.get_iterator(
self._selected_obj, axis=self.axis
):
# needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations
object.__setattr__(group, "name", name)

output = func(group, *args, **kwargs)
Expand Down Expand Up @@ -1537,6 +1550,7 @@ def _transform_general(self, func, *args, **kwargs):
except StopIteration:
pass
else:
# 2023-02-27 No tests broken by disabling this pinning
object.__setattr__(group, "name", name)
try:
path, res = self._choose_path(fast_path, slow_path, group)
Expand All @@ -1552,6 +1566,7 @@ def _transform_general(self, func, *args, **kwargs):
for name, group in gen:
if group.size == 0:
continue
# 2023-02-27 No tests broken by disabling this pinning
object.__setattr__(group, "name", name)
res = path(group)

Expand Down Expand Up @@ -1721,6 +1736,8 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
gen = self.grouper.get_iterator(obj, axis=self.axis)

for name, group in gen:
# 2023-02-27 no tests are broken this pinning, but it is documented in the
# docstring above.
object.__setattr__(group, "name", name)

res = func(group, *args, **kwargs)
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,11 @@ def apply_groupwise(
zipped = zip(group_keys, splitter)

for key, group in zipped:
# Pinning name is needed for
# test_group_apply_once_per_group,
# test_inconsistent_return_type, test_set_group_name,
# test_group_name_available_in_inference_pass,
# test_groupby_multi_timezone
object.__setattr__(group, "name", key)

# group might be modified
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_groupby_std_datetimelike():


@pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"])
def test_basic(dtype):
def test_basic_aggregations(dtype):
data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)

index = np.arange(9)
Expand Down Expand Up @@ -102,7 +102,13 @@ def test_basic(dtype):
grouped.aggregate({"one": np.mean, "two": np.std})

group_constants = {0: 10, 1: 20, 2: 30}
agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
msg = (
"Pinning the groupby key to each group in SeriesGroupBy.agg is deprecated, "
"and cases that relied on it will raise in a future version"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
# GH#41090
agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
assert agged[1] == 21

# corner cases
Expand Down

0 comments on commit 455ffb2

Please sign in to comment.