Skip to content

Commit

Permalink
Backport PR #44824: BUG: Fix regression in groupby.rolling.corr/cov w…
Browse files Browse the repository at this point in the history
…hen other is same size as each group (#44848)

Co-authored-by: Matthew Roeschke <emailformattr@gmail.com>
  • Loading branch information
meeseeksmachine and mroeschke committed Dec 11, 2021
1 parent 1804780 commit 766ea00
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 7 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.5.rst
Expand Up @@ -21,7 +21,7 @@ Fixed regressions
- Fixed performance regression in :func:`read_csv` (:issue:`44106`)
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`)
-
- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`)

.. ---------------------------------------------------------------------------
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/window/rolling.py
Expand Up @@ -658,8 +658,11 @@ def _apply_pairwise(
target = self._create_data(target)
result = super()._apply_pairwise(target, other, pairwise, func)
# 1) Determine the levels + codes of the groupby levels
if other is not None:
# When we have other, we must reindex (expand) the result
if other is not None and not all(
len(group) == len(other) for group in self._grouper.indices.values()
):
# GH 42915
# len(other) != len(any group), so must reindex (expand) the result
# from flex_binary_moment to a "transform"-like result
# per groupby combination
old_result_len = len(result)
Expand All @@ -681,10 +684,9 @@ def _apply_pairwise(
codes, levels = factorize(labels)
groupby_codes.append(codes)
groupby_levels.append(levels)

else:
# When we evaluate the pairwise=True result, repeat the groupby
# labels by the number of columns in the original object
# pairwise=True or len(other) == len(each group), so repeat
# the groupby labels by the number of columns in the original object
groupby_codes = self._grouper.codes
# error: Incompatible types in assignment (expression has type
# "List[Index]", variable has type "List[Union[ndarray, Index]]")
Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/window/test_groupby.py
Expand Up @@ -122,8 +122,33 @@ def test_rolling_quantile(self, interpolation):
expected.index = expected_index
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]])
def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val):
# GH 42915
df = DataFrame(
{"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2}
).set_index(["idx1", "idx2"])
other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index(
"idx2"
)
result = getattr(df.groupby(level=0).rolling(2), f)(other)
expected_data = ([np.nan] + [expected_val] * 4) * 2
expected = DataFrame(
expected_data,
columns=["value"],
index=MultiIndex.from_arrays(
[
[1] * 5 + [2] * 5,
[1] * 5 + [2] * 5,
list(range(1, 6)) * 2,
],
names=["idx1", "idx1", "idx2"],
),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("f", ["corr", "cov"])
def test_rolling_corr_cov(self, f):
def test_rolling_corr_cov_other_diff_size_as_groups(self, f):
g = self.frame.groupby("A")
r = g.rolling(window=4)

Expand All @@ -138,6 +163,11 @@ def func(x):
expected["A"] = np.nan
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("f", ["corr", "cov"])
def test_rolling_corr_cov_pairwise(self, f):
g = self.frame.groupby("A")
r = g.rolling(window=4)

result = getattr(r.B, f)(pairwise=True)

def func(x):
Expand Down

0 comments on commit 766ea00

Please sign in to comment.