Skip to content

Commit

Permalink
API: reimplement FixedWindowIndexer.get_window_bounds (pandas-dev#37035)
Browse files Browse the repository at this point in the history
  • Loading branch information
justinessert authored and Kevin D Smith committed Nov 2, 2020
1 parent faeb757 commit f44f2b1
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 22 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ Groupby/resample/rolling
- Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`)
- Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`)
- Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`)
- Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`).

Reshaping
^^^^^^^^^
Expand Down
30 changes: 8 additions & 22 deletions pandas/core/window/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,30 +78,16 @@ def get_window_bounds(
closed: Optional[str] = None,
) -> Tuple[np.ndarray, np.ndarray]:

start_s = np.zeros(self.window_size, dtype="int64")
start_e = (
np.arange(self.window_size, num_values, dtype="int64")
- self.window_size
+ 1
)
start = np.concatenate([start_s, start_e])[:num_values]

end_s = np.arange(self.window_size, dtype="int64") + 1
end_e = start_e + self.window_size
end = np.concatenate([end_s, end_e])[:num_values]

if center and self.window_size > 2:
offset = min((self.window_size - 1) // 2, num_values - 1)
start_s_buffer = np.roll(start, -offset)[: num_values - offset]
end_s_buffer = np.roll(end, -offset)[: num_values - offset]
if center:
offset = (self.window_size - 1) // 2
else:
offset = 0

start_e_buffer = np.arange(
start[-1] + 1, start[-1] + 1 + offset, dtype="int64"
)
end_e_buffer = np.array([end[-1]] * offset, dtype="int64")
end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
start = end - self.window_size

start = np.concatenate([start_s_buffer, start_e_buffer])
end = np.concatenate([end_s_buffer, end_e_buffer])
end = np.clip(end, 0, num_values)
start = np.clip(start, 0, num_values)

return start, end

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,33 @@ def test_groupby_rolling_center_center(self):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("min_periods", [5, 4, 3])
def test_groupby_rolling_center_min_periods(self, min_periods):
# GH 36040
df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)})

window_size = 5
result = (
df.groupby("group")
.rolling(window_size, center=True, min_periods=min_periods)
.mean()
)
result = result.reset_index()[["group", "data"]]

grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0]
grp_B_mean = [x + 10.0 for x in grp_A_mean]

num_nans = max(0, min_periods - 3) # For window_size of 5
nans = [np.nan] * num_nans
grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans
grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans

expected = pd.DataFrame(
{"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected}
)

tm.assert_frame_equal(result, expected)

def test_groupby_subselect_rolling(self):
# GH 35486
df = DataFrame(
Expand Down

0 comments on commit f44f2b1

Please sign in to comment.