Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REGR: SeriesGroupBy.agg with multiple categoricals, as_index=False, and a list fails #52850

Merged
merged 2 commits into from
Apr 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.sort_values` not resetting index when :class:`DataFrame` is already sorted and ``ignore_index=True`` (:issue:`52553`)
- Fixed regression in :meth:`MultiIndex.isin` raising ``TypeError`` for ``Generator`` (:issue:`52568`)
- Fixed regression in :meth:`Series.describe` showing ``RuntimeWarning`` for extension dtype :class:`Series` with one element (:issue:`52515`)
- Fixed regression in :meth:`SeriesGroupBy.agg` failing when grouping with categorical data, multiple groupings, ``as_index=False``, and a list of aggregations (:issue:`52760`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.bug_fixes:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
assert columns is not None # for mypy
ret.columns = columns
if not self.as_index:
ret = self._insert_inaxis_grouper(ret)
ret.index = default_index(len(ret))
ret = ret.reset_index()
return ret

else:
Expand Down Expand Up @@ -352,7 +351,6 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
output = self.obj._constructor_expanddim(indexed_output, index=None)
output.columns = Index(key.label for key in results)

output = self._reindex_output(output)
return output

def _wrap_applied_output(
Expand Down
48 changes: 48 additions & 0 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
qcut,
)
import pandas._testing as tm
from pandas.api.typing import SeriesGroupBy
from pandas.tests.groupby import get_groupby_method_args


Expand Down Expand Up @@ -2036,3 +2037,50 @@ def test_groupby_default_depr(cat_columns, keys):
klass = FutureWarning if set(cat_columns) & set(keys) else None
with tm.assert_produces_warning(klass, match=msg):
df.groupby(keys)


@pytest.mark.parametrize("test_series", [True, False])
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
def test_agg_list(request, as_index, observed, reduction_func, test_series, keys):
# GH#52760
if test_series and reduction_func == "corrwith":
assert not hasattr(SeriesGroupBy, "corrwith")
pytest.skip("corrwith not implemented for SeriesGroupBy")
elif reduction_func == "corrwith":
msg = "GH#32293: attempts to call SeriesGroupBy.corrwith"
request.node.add_marker(pytest.mark.xfail(reason=msg))
elif (
reduction_func == "nunique"
and not test_series
and len(keys) != 1
and not observed
and not as_index
):
msg = "GH#52848 - raises a ValueError"
request.node.add_marker(pytest.mark.xfail(reason=msg))

df = DataFrame({"a1": [0, 0, 1], "a2": [2, 3, 3], "b": [4, 5, 6]})
df = df.astype({"a1": "category", "a2": "category"})
if "a2" not in keys:
df = df.drop(columns="a2")
gb = df.groupby(by=keys, as_index=as_index, observed=observed)
if test_series:
gb = gb["b"]
args = get_groupby_method_args(reduction_func, df)

result = gb.agg([reduction_func], *args)
expected = getattr(gb, reduction_func)(*args)

if as_index and (test_series or reduction_func == "size"):
expected = expected.to_frame(reduction_func)
if not test_series:
if not as_index:
# TODO: GH#52849 - as_index=False is not respected
expected = expected.set_index(keys)
expected.columns = MultiIndex(
levels=[["b"], [reduction_func]], codes=[[0], [0]]
)
elif not as_index:
expected.columns = keys + [reduction_func]

tm.assert_equal(result, expected)
Loading