Skip to content

Commit 0c52e25

Browse files
authored
BUG: fix computation for Rolling.sem (#63181)
1 parent 0894be8 commit 0c52e25

File tree

5 files changed

+23
-20
lines changed

5 files changed

+23
-20
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,7 @@ Groupby/resample/rolling
12621262
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
12631263
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
12641264
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
1265+
- Bug in :meth:`Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`)
12651266
- Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
12661267
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
12671268
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)

pandas/core/window/rolling.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,8 +1779,8 @@ def skew(self, numeric_only: bool = False):
17791779
def sem(self, ddof: int = 1, numeric_only: bool = False):
17801780
# Raise here so error message says sem instead of std
17811781
self._validate_numeric_only("sem", numeric_only)
1782-
return self.std(numeric_only=numeric_only) / (
1783-
self.count(numeric_only=numeric_only) - ddof
1782+
return self.std(numeric_only=numeric_only, ddof=ddof) / (
1783+
self.count(numeric_only=numeric_only)
17841784
).pow(0.5)
17851785

17861786
def kurt(self, numeric_only: bool = False):
@@ -2941,16 +2941,16 @@ def sem(self, ddof: int = 1, numeric_only: bool = False):
29412941
--------
29422942
>>> s = pd.Series([0, 1, 2, 3])
29432943
>>> s.rolling(2, min_periods=1).sem()
2944-
0 NaN
2945-
1 0.707107
2946-
2 0.707107
2947-
3 0.707107
2944+
0 NaN
2945+
1 0.5
2946+
2 0.5
2947+
3 0.5
29482948
dtype: float64
29492949
"""
29502950
# Raise here so error message says sem instead of std
29512951
self._validate_numeric_only("sem", numeric_only)
2952-
return self.std(numeric_only=numeric_only) / (
2953-
self.count(numeric_only) - ddof
2952+
return self.std(numeric_only=numeric_only, ddof=ddof) / (
2953+
self.count(numeric_only)
29542954
).pow(0.5)
29552955

29562956
def kurt(self, numeric_only: bool = False):

pandas/tests/window/test_expanding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def test_expanding_sem(frame_or_series):
216216
result = obj.expanding().sem()
217217
if isinstance(result, DataFrame):
218218
result = Series(result[0].values)
219-
expected = Series([np.nan] + [0.707107] * 2)
219+
expected = Series([np.nan, 0.5, (1 / 3) ** 0.5])
220220
tm.assert_series_equal(result, expected)
221221

222222

pandas/tests/window/test_groupby.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -673,17 +673,24 @@ def test_groupby_rolling_count_closed_on(self, unit):
673673
tm.assert_series_equal(result, expected)
674674

675675
@pytest.mark.parametrize(
676-
("func", "kwargs"),
677-
[("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})],
676+
("func", "kwargs", "expected_values"),
677+
[
678+
(
679+
"rolling",
680+
{"window": 2, "min_periods": 1},
681+
[np.nan, 0.5, np.nan, 0.5, 0.5],
682+
),
683+
("expanding", {}, [np.nan, 0.5, np.nan, 0.5, (1 / 3) ** 0.5]),
684+
],
678685
)
679-
def test_groupby_rolling_sem(self, func, kwargs):
686+
def test_groupby_rolling_sem(self, func, kwargs, expected_values):
680687
# GH: 26476
681688
df = DataFrame(
682689
[["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"]
683690
)
684691
result = getattr(df.groupby("a"), func)(**kwargs).sem()
685692
expected = DataFrame(
686-
{"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]},
693+
{"a": [np.nan] * 5, "b": expected_values},
687694
index=MultiIndex.from_tuples(
688695
[("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None]
689696
),

pandas/tests/window/test_rolling.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,10 @@ def test_datetimelike_centered_selections(
225225
index=date_range("2020", periods=5),
226226
)
227227

228-
if func_name == "sem":
229-
kwargs = {"ddof": 0}
230-
else:
231-
kwargs = {}
232-
233228
result = getattr(
234229
df_time.rolling("2D", closed=closed, min_periods=1, center=True),
235230
func_name,
236-
)(**kwargs)
231+
)()
237232

238233
tm.assert_frame_equal(result, expected, check_dtype=False)
239234

@@ -1078,7 +1073,7 @@ def test_rolling_sem(frame_or_series):
10781073
result = obj.rolling(2, min_periods=1).sem()
10791074
if isinstance(result, DataFrame):
10801075
result = Series(result[0].values)
1081-
expected = Series([np.nan] + [0.7071067811865476] * 2)
1076+
expected = Series([np.nan] + [0.5] * 2)
10821077
tm.assert_series_equal(result, expected)
10831078

10841079

0 commit comments

Comments
 (0)