Backport PR #55042 on branch 2.1.x (REGR: DataFrameGroupBy.agg with d…

…uplicate column names and a dict) (#55056) Backport PR #55042: REGR: DataFrameGroupBy.agg with duplicate column names and a dict Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
pandas-dev · Sep 7, 2023 · 2f0ef87 · 2f0ef87
1 parent 610c0f4
commit 2f0ef87
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 1 deletion.
diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst
@@ -21,6 +21,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
 - Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
 - Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
+- Fixed regression in :meth:`DataFrameGroupBy.agg` when aggregating a DataFrame with duplicate column names using a dictionary (:issue:`55006`)
 - Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
 - Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)
 - Fixed regression in :meth:`Series.interpolate` raising when ``fill_value`` was given (:issue:`54920`)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -434,7 +434,13 @@ def compute_dict_like(
             Data for result. When aggregating with a Series, this can contain any
             Python object.
         """
+        from pandas.core.groupby.generic import (
+            DataFrameGroupBy,
+            SeriesGroupBy,
+        )
+
         obj = self.obj
+        is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
         func = cast(AggFuncTypeDict, self.func)
         func = self.normalize_dictlike_arg(op_name, selected_obj, func)
 
@@ -448,7 +454,7 @@ def compute_dict_like(
             colg = obj._gotitem(selection, ndim=1)
             results = [getattr(colg, op_name)(how, **kwargs) for _, how in func.items()]
             keys = list(func.keys())
-        elif is_non_unique_col:
+        elif not is_groupby and is_non_unique_col:
             # key used for column selection and output
             # GH#51099
             results = []

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -513,6 +513,18 @@ def test_groupby_agg_dict_with_getitem():
     tm.assert_frame_equal(result, expected)
 
 
+def test_groupby_agg_dict_dup_columns():
+    # GH#55006
+    df = DataFrame(
+        [[1, 2, 3, 4], [1, 3, 4, 5], [2, 4, 5, 6]],
+        columns=["a", "b", "c", "c"],
+    )
+    gb = df.groupby("a")
+    result = gb.agg({"b": "sum"})
+    expected = DataFrame({"b": [5, 4]}, index=Index([1, 2], name="a"))
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "op",
     [