Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1260,6 +1260,7 @@ Groupby/resample/rolling
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
- Bug in :meth:`Rolling.skew` and :meth:`Rolling.kurt` produced ``NaN`` for low-variance inputs due to an absolute numerical instability check. This check was removed/turned into a relative check depending on the numerical stability of the calculation (:issue:`62946`)

Reshaping
^^^^^^^^^
Expand Down
26 changes: 10 additions & 16 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -506,17 +506,6 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
# uniform case, force result to be 0
elif num_consecutive_same_value >= nobs:
result = 0.0
# #18044: with degenerate distribution, floating issue will
# cause m2 != 0. and cause the result is a very
# large number.
#
# in core/nanops.py nanskew/nankurt call the function
# _zero_out_fperr(m2) to fix floating error.
# if the variance is less than 1e-14, it could be
# treat as zero, here we follow the original
# skew/kurt behaviour to check m2 <= n * 1e-14
elif m2 <= dnobs * 1e-14:
result = NaN
else:
moments_ratio = m3 / (m2 * sqrt(m2))
correction = dnobs * sqrt((dnobs - 1)) / (dnobs - 2)
Expand Down Expand Up @@ -688,7 +677,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
int64_t num_consecutive_same_value,
) noexcept nogil:
cdef:
float64_t result, dnobs
float64_t result, dnobs, variance_cutoff
float64_t A, B, C, D, R, K

if nobs >= minp:
Expand All @@ -708,16 +697,21 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
R = R * A
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A

# Relative cutoff as introduced in #62405
# See the comment in nanops.nankurt for further explanation
variance_cutoff = EpsF64 * EpsF64 * A * A * dnobs

# #18044: with uniform distribution, floating issue will
# cause B != 0. and cause the result is a very
# large number.
#
# in core/nanops.py nanskew/nankurt call the function
# _zero_out_fperr(m2) to fix floating error.
# if the variance is less than 1e-14, it could be
# treat as zero, here we follow the original
# skew/kurt behaviour to check B <= 1e-14
if B <= 1e-14:
# if the variance is less than a relative cutoff value
# it could be treated as zero, here we follow the original
# skew/kurt behaviour to check
# m2 <= ((float64_machine_eps * mean) ** 2) * observations
if B <= variance_cutoff:
result = NaN
else:
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/window/test_rolling_skew_kurt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
from functools import partial

import numpy as np
Expand All @@ -6,6 +7,7 @@
from pandas import (
DataFrame,
Series,
bdate_range,
concat,
isna,
notna,
Expand All @@ -25,6 +27,24 @@ def test_series(series, sp_func, roll_func):
tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))


@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_low_variance_series(sp_func, roll_func):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just add a comment linking to the issue for future contributors.

Suggested change
def test_low_variance_series(sp_func, roll_func):
def test_low_variance_series(sp_func, roll_func):
# GH#62946

sp_stats = pytest.importorskip("scipy.stats")

arr = np.random.default_rng(505).normal(loc=0e0, scale=1e-16, size=100)
locs = np.arange(20, 40)
arr[locs] = np.nan
low_variance_series = Series(
arr, index=bdate_range(datetime(2009, 1, 1), periods=100)
)

compare_func = partial(getattr(sp_stats, sp_func), bias=False)
window = low_variance_series.rolling(50)
result = getattr(window, roll_func)()
expected = window.apply(compare_func)
tm.assert_almost_equal(result, expected)


@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_frame(raw, frame, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
Expand Down
Loading