diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 10f2b1cff73a0..0fbef57999802 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -446,6 +446,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`) - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) - Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`) +- Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`). Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 023f598f606f3..f2bc01438097c 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -78,30 +78,16 @@ def get_window_bounds( closed: Optional[str] = None, ) -> Tuple[np.ndarray, np.ndarray]: - start_s = np.zeros(self.window_size, dtype="int64") - start_e = ( - np.arange(self.window_size, num_values, dtype="int64") - - self.window_size - + 1 - ) - start = np.concatenate([start_s, start_e])[:num_values] - - end_s = np.arange(self.window_size, dtype="int64") + 1 - end_e = start_e + self.window_size - end = np.concatenate([end_s, end_e])[:num_values] - - if center and self.window_size > 2: - offset = min((self.window_size - 1) // 2, num_values - 1) - start_s_buffer = np.roll(start, -offset)[: num_values - offset] - end_s_buffer = np.roll(end, -offset)[: num_values - offset] + if center: + offset = (self.window_size - 1) // 2 + else: + offset = 0 - start_e_buffer = np.arange( - start[-1] + 1, start[-1] + 1 + offset, dtype="int64" - ) - end_e_buffer = np.array([end[-1]] * offset, dtype="int64") + end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64") + start = end - self.window_size - start = np.concatenate([start_s_buffer, start_e_buffer]) - end = np.concatenate([end_s_buffer, end_e_buffer]) + end = np.clip(end, 0, num_values) + start = np.clip(start, 0, num_values) return start, end diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 63bf731e95096..6b80f65c16fa6 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -297,6 +297,33 @@ def test_groupby_rolling_center_center(self): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("min_periods", [5, 4, 3]) + def test_groupby_rolling_center_min_periods(self, min_periods): + # GH 36040 + df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) + + window_size = 5 + result = ( + df.groupby("group") + .rolling(window_size, center=True, min_periods=min_periods) + .mean() + ) + result = result.reset_index()[["group", "data"]] + + grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0] + grp_B_mean = [x + 10.0 for x in grp_A_mean] + + num_nans = max(0, min_periods - 3) # For window_size of 5 + nans = [np.nan] * num_nans + grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans + grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans + + expected = pd.DataFrame( + {"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected} + ) + + tm.assert_frame_equal(result, expected) + def test_groupby_subselect_rolling(self): # GH 35486 df = DataFrame(