From 3125a1d6b5a67c0df3f90b7cb038f4f52c203e40 Mon Sep 17 00:00:00 2001 From: jadosh Date: Sun, 16 Nov 2025 14:50:08 +0200 Subject: [PATCH] TST: Add regression test for apply mutation GH-40673: Adds a test case to prevent regression of a bug where the internal object reused by apply() would corrupt externally stored DataFrames created with .copy(). This test verifies that store[0] and store[1] correctly contain independent copies of their respective groups. --- pandas/tests/groupby/test_apply.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 635393e41bd9d..fa68f407bbfe4 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1516,3 +1516,32 @@ def f(x): ).set_index(["cat1", "cat2"])["rank"] result = df.groupby("cat1").apply(f) tm.assert_series_equal(result, expected) + + +def test_groupby_apply_store_copy(): + # GH40673 + rng = np.random.default_rng(seed=42) + + df = DataFrame( + { + "A": rng.normal(10, 12, size=(4,)), + "B": [1, 2, 1, 2], + } + ) + + # Empty dict to hold the chunks + store = {} + + def addstore(x): + store[len(store)] = x.copy() + + df.groupby("B").apply(addstore) + + # Output boolean mask + out_mask = {0: [True, False, True, False], 1: [False, True, False, True]} + + # The expected output in store dict + expected_out = {0: df[out_mask[0]], 1: df[out_mask[1]]} + + tm.assert_frame_equal(store[0], expected_out[0].drop("B", axis=1)) + tm.assert_frame_equal(store[1], expected_out[1].drop("B", axis=1))