diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4ab20623cc561..6094c494d02ad 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1263,6 +1263,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) +- Bug in :meth:`Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`) - Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`) - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`) - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 17b189e222299..88d158c7fcd81 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1779,8 +1779,8 @@ def skew(self, numeric_only: bool = False): def sem(self, ddof: int = 1, numeric_only: bool = False): # Raise here so error message says sem instead of std self._validate_numeric_only("sem", numeric_only) - return self.std(numeric_only=numeric_only) / ( - self.count(numeric_only=numeric_only) - ddof + return self.std(numeric_only=numeric_only, ddof=ddof) / ( + self.count(numeric_only=numeric_only) ).pow(0.5) def kurt(self, numeric_only: bool = False): @@ -2941,16 +2941,16 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): -------- >>> s = pd.Series([0, 1, 2, 3]) >>> s.rolling(2, min_periods=1).sem() - 0 NaN - 1 0.707107 - 2 0.707107 - 3 0.707107 + 0 NaN + 1 0.5 + 2 0.5 + 3 0.5 dtype: float64 """ # Raise here so error message says sem instead of std self._validate_numeric_only("sem", numeric_only) - return self.std(numeric_only=numeric_only) / ( - self.count(numeric_only) - ddof + return self.std(numeric_only=numeric_only, ddof=ddof) / ( + self.count(numeric_only) ).pow(0.5) def kurt(self, numeric_only: bool = False): diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index ddcdf07beeb4c..d0bd68214bcba 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -216,7 +216,7 @@ def test_expanding_sem(frame_or_series): result = obj.expanding().sem() if isinstance(result, DataFrame): result = Series(result[0].values) - expected = Series([np.nan] + [0.707107] * 2) + expected = Series([np.nan, 0.5, (1 / 3) ** 0.5]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index f9595970bdb63..543ae095b1cb4 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -673,17 +673,24 @@ def test_groupby_rolling_count_closed_on(self, unit): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - ("func", "kwargs"), - [("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})], + ("func", "kwargs", "expected_values"), + [ + ( + "rolling", + {"window": 2, "min_periods": 1}, + [np.nan, 0.5, np.nan, 0.5, 0.5], + ), + ("expanding", {}, [np.nan, 0.5, np.nan, 0.5, (1 / 3) ** 0.5]), + ], ) - def test_groupby_rolling_sem(self, func, kwargs): + def test_groupby_rolling_sem(self, func, kwargs, expected_values): # GH: 26476 df = DataFrame( [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"] ) result = getattr(df.groupby("a"), func)(**kwargs).sem() expected = DataFrame( - {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]}, + {"a": [np.nan] * 5, "b": expected_values}, index=MultiIndex.from_tuples( [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] ), diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index dff307b595a3a..71717d4259703 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -225,15 +225,10 @@ def test_datetimelike_centered_selections( index=date_range("2020", periods=5), ) - if func_name == "sem": - kwargs = {"ddof": 0} - else: - kwargs = {} - result = getattr( df_time.rolling("2D", closed=closed, min_periods=1, center=True), func_name, - )(**kwargs) + )() tm.assert_frame_equal(result, expected, check_dtype=False) @@ -1078,7 +1073,7 @@ def test_rolling_sem(frame_or_series): result = obj.rolling(2, min_periods=1).sem() if isinstance(result, DataFrame): result = Series(result[0].values) - expected = Series([np.nan] + [0.7071067811865476] * 2) + expected = Series([np.nan] + [0.5] * 2) tm.assert_series_equal(result, expected)