Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1263,6 +1263,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
- Bug in :meth:`Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`)
- Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
Expand Down
16 changes: 8 additions & 8 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1779,8 +1779,8 @@ def skew(self, numeric_only: bool = False):
def sem(self, ddof: int = 1, numeric_only: bool = False):
# Raise here so error message says sem instead of std
self._validate_numeric_only("sem", numeric_only)
return self.std(numeric_only=numeric_only) / (
self.count(numeric_only=numeric_only) - ddof
return self.std(numeric_only=numeric_only, ddof=ddof) / (
self.count(numeric_only=numeric_only)
).pow(0.5)

def kurt(self, numeric_only: bool = False):
Expand Down Expand Up @@ -2941,16 +2941,16 @@ def sem(self, ddof: int = 1, numeric_only: bool = False):
--------
>>> s = pd.Series([0, 1, 2, 3])
>>> s.rolling(2, min_periods=1).sem()
0 NaN
1 0.707107
2 0.707107
3 0.707107
0 NaN
1 0.5
2 0.5
3 0.5
dtype: float64
"""
# Raise here so error message says sem instead of std
self._validate_numeric_only("sem", numeric_only)
return self.std(numeric_only=numeric_only) / (
self.count(numeric_only) - ddof
return self.std(numeric_only=numeric_only, ddof=ddof) / (
self.count(numeric_only)
).pow(0.5)

def kurt(self, numeric_only: bool = False):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_expanding_sem(frame_or_series):
result = obj.expanding().sem()
if isinstance(result, DataFrame):
result = Series(result[0].values)
expected = Series([np.nan] + [0.707107] * 2)
expected = Series([np.nan, 0.5, (1 / 3) ** 0.5])
tm.assert_series_equal(result, expected)


Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,17 +673,24 @@ def test_groupby_rolling_count_closed_on(self, unit):
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
("func", "kwargs"),
[("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})],
("func", "kwargs", "expected_values"),
[
(
"rolling",
{"window": 2, "min_periods": 1},
[np.nan, 0.5, np.nan, 0.5, 0.5],
),
("expanding", {}, [np.nan, 0.5, np.nan, 0.5, (1 / 3) ** 0.5]),
],
)
def test_groupby_rolling_sem(self, func, kwargs):
def test_groupby_rolling_sem(self, func, kwargs, expected_values):
# GH: 26476
df = DataFrame(
[["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"]
)
result = getattr(df.groupby("a"), func)(**kwargs).sem()
expected = DataFrame(
{"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]},
{"a": [np.nan] * 5, "b": expected_values},
index=MultiIndex.from_tuples(
[("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None]
),
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,10 @@ def test_datetimelike_centered_selections(
index=date_range("2020", periods=5),
)

if func_name == "sem":
kwargs = {"ddof": 0}
else:
kwargs = {}

result = getattr(
df_time.rolling("2D", closed=closed, min_periods=1, center=True),
func_name,
)(**kwargs)
)()

tm.assert_frame_equal(result, expected, check_dtype=False)

Expand Down Expand Up @@ -1078,7 +1073,7 @@ def test_rolling_sem(frame_or_series):
result = obj.rolling(2, min_periods=1).sem()
if isinstance(result, DataFrame):
result = Series(result[0].values)
expected = Series([np.nan] + [0.7071067811865476] * 2)
expected = Series([np.nan] + [0.5] * 2)
tm.assert_series_equal(result, expected)


Expand Down
Loading