From 1ec3f9f527aa8be8b526aebc25f4a611288bdcdf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 8 Dec 2021 16:10:27 -0800 Subject: [PATCH] BUG: Fix regression in groupby.rolling.corr/cov when other is same size as each group --- doc/source/whatsnew/v1.3.5.rst | 2 +- pandas/core/window/rolling.py | 12 ++++++----- pandas/tests/window/test_groupby.py | 32 ++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 048cd978c4478..198d24e260872 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -20,7 +20,7 @@ Fixed regressions - Fixed performance regression in :func:`read_csv` (:issue:`44106`) - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) - Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) -- +- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index fc3390ee6db03..defae3392bfce 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -747,8 +747,11 @@ def _apply_pairwise( target = self._create_data(target) result = super()._apply_pairwise(target, other, pairwise, func) # 1) Determine the levels + codes of the groupby levels - if other is not None: - # When we have other, we must reindex (expand) the result + if other is not None and not all( + len(group) == len(other) for group in self._grouper.indices.values() + ): + # GH 42915 + # len(other) != len(any group), so must reindex (expand) the result # from flex_binary_moment to a "transform"-like result # per groupby combination old_result_len = len(result) @@ -770,10 +773,9 @@ def _apply_pairwise( codes, levels = factorize(labels) groupby_codes.append(codes) groupby_levels.append(levels) - else: - # When we evaluate the pairwise=True result, repeat the groupby - # labels by the number of columns in the original object + # pairwise=True or len(other) == len(each group), so repeat + # the groupby labels by the number of columns in the original object groupby_codes = self._grouper.codes # error: Incompatible types in assignment (expression has type # "List[Index]", variable has type "List[Union[ndarray, Index]]") diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 05f485fcc3c65..6ec19e4899d53 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -122,8 +122,33 @@ def test_rolling_quantile(self, interpolation): expected.index = expected_index tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]]) + def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val): + # GH 42915 + df = DataFrame( + {"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2} + ).set_index(["idx1", "idx2"]) + other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index( + "idx2" + ) + result = getattr(df.groupby(level=0).rolling(2), f)(other) + expected_data = ([np.nan] + [expected_val] * 4) * 2 + expected = DataFrame( + expected_data, + columns=["value"], + index=MultiIndex.from_arrays( + [ + [1] * 5 + [2] * 5, + [1] * 5 + [2] * 5, + list(range(1, 6)) * 2, + ], + names=["idx1", "idx1", "idx2"], + ), + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("f", ["corr", "cov"]) - def test_rolling_corr_cov(self, f): + def test_rolling_corr_cov_other_diff_size_as_groups(self, f): g = self.frame.groupby("A") r = g.rolling(window=4) @@ -138,6 +163,11 @@ def func(x): expected["A"] = np.nan tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("f", ["corr", "cov"]) + def test_rolling_corr_cov_pairwise(self, f): + g = self.frame.groupby("A") + r = g.rolling(window=4) + result = getattr(r.B, f)(pairwise=True) def func(x):