Backport PR #44824: BUG: Fix regression in groupby.rolling.corr/cov w…

…hen other is same size as each group (#44848) Co-authored-by: Matthew Roeschke <emailformattr@gmail.com>
pandas-dev · Dec 11, 2021 · 766ea00 · 766ea00
1 parent 1804780
commit 766ea00
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 7 deletions.
diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst
@@ -21,7 +21,7 @@ Fixed regressions
 - Fixed performance regression in :func:`read_csv` (:issue:`44106`)
 - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
 - Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`)
--
+- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -658,8 +658,11 @@ def _apply_pairwise(
         target = self._create_data(target)
         result = super()._apply_pairwise(target, other, pairwise, func)
         # 1) Determine the levels + codes of the groupby levels
-        if other is not None:
-            # When we have other, we must reindex (expand) the result
+        if other is not None and not all(
+            len(group) == len(other) for group in self._grouper.indices.values()
+        ):
+            # GH 42915
+            # len(other) != len(any group), so must reindex (expand) the result
             # from flex_binary_moment to a "transform"-like result
             # per groupby combination
             old_result_len = len(result)
@@ -681,10 +684,9 @@ def _apply_pairwise(
                 codes, levels = factorize(labels)
                 groupby_codes.append(codes)
                 groupby_levels.append(levels)
-
         else:
-            # When we evaluate the pairwise=True result, repeat the groupby
-            # labels by the number of columns in the original object
+            # pairwise=True or len(other) == len(each group), so repeat
+            # the groupby labels by the number of columns in the original object
             groupby_codes = self._grouper.codes
             # error: Incompatible types in assignment (expression has type
             # "List[Index]", variable has type "List[Union[ndarray, Index]]")

diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
@@ -122,8 +122,33 @@ def test_rolling_quantile(self, interpolation):
         expected.index = expected_index
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]])
+    def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val):
+        # GH 42915
+        df = DataFrame(
+            {"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2}
+        ).set_index(["idx1", "idx2"])
+        other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index(
+            "idx2"
+        )
+        result = getattr(df.groupby(level=0).rolling(2), f)(other)
+        expected_data = ([np.nan] + [expected_val] * 4) * 2
+        expected = DataFrame(
+            expected_data,
+            columns=["value"],
+            index=MultiIndex.from_arrays(
+                [
+                    [1] * 5 + [2] * 5,
+                    [1] * 5 + [2] * 5,
+                    list(range(1, 6)) * 2,
+                ],
+                names=["idx1", "idx1", "idx2"],
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize("f", ["corr", "cov"])
-    def test_rolling_corr_cov(self, f):
+    def test_rolling_corr_cov_other_diff_size_as_groups(self, f):
         g = self.frame.groupby("A")
         r = g.rolling(window=4)
 
@@ -138,6 +163,11 @@ def func(x):
         expected["A"] = np.nan
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("f", ["corr", "cov"])
+    def test_rolling_corr_cov_pairwise(self, f):
+        g = self.frame.groupby("A")
+        r = g.rolling(window=4)
+
         result = getattr(r.B, f)(pairwise=True)
 
         def func(x):