From 8da1d39e96de8c81efa7b38df64fe1ba0739a51b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 7 Jan 2024 10:53:48 +0100 Subject: [PATCH 1/2] BUG: fix subclass metadata preservation in groupby column selection --- pandas/core/frame.py | 4 +++- pandas/tests/groupby/test_groupby_subclass.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8db437ccec389..021c7b74adb7f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4016,7 +4016,9 @@ def _getitem_nocopy(self, key: list): copy=False, only_slice=True, ) - return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) + result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) + result = result.__finalize__(self) + return result def __getitem__(self, key): check_dict_or_set_indexers(key) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index bf809bd5db437..d54c41d6c4e8b 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -69,6 +69,7 @@ def test_groupby_preserves_metadata(): def func(group): assert isinstance(group, tm.SubclassedDataFrame) assert hasattr(group, "testattr") + assert group.testattr == "hello" return group.testattr msg = "DataFrameGroupBy.apply operated on the grouping columns" @@ -79,6 +80,12 @@ def func(group): expected = tm.SubclassedSeries(["hello"] * 3, index=Index([7, 8, 9], name="c")) tm.assert_series_equal(result, expected) + result = custom_df.groupby("c").apply(func, include_groups=False) + tm.assert_series_equal(result, expected) + + result = custom_df.groupby("c")[["a", "b"]].apply(func) + tm.assert_series_equal(result, expected) + def func2(group): assert isinstance(group, tm.SubclassedSeries) assert hasattr(group, "testattr") From 72346212b133af7b8720b452604fd49d0174339a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 7 Jan 2024 11:02:21 +0100 Subject: [PATCH 2/2] add whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/tests/groupby/test_groupby_subclass.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0b04a1d313a6d..2b436bc5d1855 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -873,6 +873,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`) - Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`) - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` where grouping by a combination of ``Decimal`` and NA values would fail when ``sort=True`` (:issue:`54847`) +- Bug in :meth:`DataFrame.groupby` for DataFrame subclasses when selecting a subset of columns to apply the function to (:issue:`56761`) - Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`) - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index d54c41d6c4e8b..17ef6ee913463 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -83,6 +83,7 @@ def func(group): result = custom_df.groupby("c").apply(func, include_groups=False) tm.assert_series_equal(result, expected) + # https://github.com/pandas-dev/pandas/pull/56761 result = custom_df.groupby("c")[["a", "b"]].apply(func) tm.assert_series_equal(result, expected)