From 99a602ff85cbd716de6397bbc8e096310469132d Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Wed, 29 Oct 2025 21:33:09 +0100 Subject: [PATCH 01/13] kurtosis adjustment to return NaN for constant variance --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0c8ea28b60ce8..46df56b4fbfd6 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -694,7 +694,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, # GH 42064 46431 # uniform case, force result to be -3. elif num_consecutive_same_value >= nobs: - result = -3. + result = NaN else: dnobs = nobs A = x / dnobs From 612ec5f77e1b2cbdc9e32b99caa7c5b63c231b4d Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Wed, 29 Oct 2025 21:45:25 +0100 Subject: [PATCH 02/13] adjusted kurt for series to align behaviour --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 772f425beed96..e3d8587f83834 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1400,7 +1400,7 @@ def nankurt( if count < 4: return np.nan if denominator == 0: - return values.dtype.type(0) + return np.nan with np.errstate(invalid="ignore", divide="ignore"): result = numerator / denominator - adj From a5e46805e91a9c6b3cfceecda0004c1d05c6f1c6 Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Wed, 29 Oct 2025 22:34:43 +0100 Subject: [PATCH 03/13] adjusted for other condition as well --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e3d8587f83834..daa244be12ea5 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1410,7 +1410,7 @@ def nankurt( result = result.astype(dtype, copy=False) if isinstance(result, np.ndarray): - result = np.where(denominator == 0, 0, result) + result = np.where(denominator == 0, np.nan, result) result[count < 4] = np.nan return result From ade3f0cbfc4844e186d364ece812a3a23f265701 Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Thu, 30 Oct 2025 21:48:32 +0100 Subject: [PATCH 04/13] finalized kurt and skew fix pd.series --- pandas/core/nanops.py | 4 ++-- pandas/tests/test_nanops.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index daa244be12ea5..27e0d6ba1760f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1287,10 +1287,10 @@ def nanskew( result = result.astype(dtype, copy=False) if isinstance(result, np.ndarray): - result = np.where(m2 == 0, 0, result) + result = np.where(m2 == 0, np.nan, result) result[count < 3] = np.nan else: - result = dtype.type(0) if m2 == 0 else result + result = np.nan if m2 == 0 else result if count < 3: return np.nan diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 6788f2056bb9a..07bb93a90fbe0 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -559,10 +559,10 @@ def _skew_kurt_wrap(self, values, axis=None, func=None): result = func(values, axis=axis, bias=False) # fix for handling cases where all elements in an axis are the same if isinstance(result, np.ndarray): - result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0 + result[np.max(values, axis=axis) == np.min(values, axis=axis)] = np.nan return result elif np.max(values) == np.min(values): - return 0.0 + return np.nan return result def test_nanskew(self, skipna): @@ -1021,7 +1021,7 @@ def test_constant_series(self, val): # xref GH 11974 data = val * np.ones(300) skew = nanops.nanskew(data) - assert skew == 0.0 + assert np.isnan(skew) def test_all_finite(self): alpha, beta = 0.3, 0.1 @@ -1089,7 +1089,7 @@ def test_constant_series(self, val): # xref GH 11974 data = val * np.ones(300) kurt = nanops.nankurt(data) - tm.assert_equal(kurt, 0.0) + tm.assert_equal(kurt, np.nan) def test_all_finite(self): alpha, beta = 0.3, 0.1 From b74c98a9db56b8a5d0d24b22fa51ddc16fd453ac Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Thu, 30 Oct 2025 22:04:36 +0100 Subject: [PATCH 05/13] adjusted window tests --- pandas/_libs/window/aggregations.pyx | 4 ++-- pandas/tests/window/test_rolling_skew_kurt.py | 12 +++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 46df56b4fbfd6..917b62227e687 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -498,9 +498,9 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, if nobs < 3: result = NaN # GH 42064 46431 - # uniform case, force result to be 0 + # uniform case, force result to be NaN elif num_consecutive_same_value >= nobs: - result = 0.0 + result = NaN # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very # large number. diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 79c14f243e7cc..35d0a88d292ac 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -170,7 +170,7 @@ def test_center_reindex_frame(frame, roll_func): def test_rolling_skew_edge_cases(step): - expected = Series([np.nan] * 4 + [0.0])[::step] + expected = Series([np.nan] * 5)[::step] # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5, step=step).skew() @@ -191,7 +191,7 @@ def test_rolling_skew_edge_cases(step): def test_rolling_kurt_edge_cases(step): - expected = Series([np.nan] * 4 + [-3.0])[::step] + expected = Series([np.nan] * 5)[::step] # yields all NaN (0 variance) d = Series([1] * 5) @@ -212,16 +212,14 @@ def test_rolling_kurt_edge_cases(step): def test_rolling_skew_eq_value_fperr(step): - # #18804 all rolling skew for all equal values should return Nan + # #18804 all rolling skew for all equal values should return NaN # #46717 update: all equal values should return 0 instead of NaN a = Series([1.1] * 15).rolling(window=10, step=step).skew() - assert (a[a.index >= 9] == 0).all() - assert a[a.index < 9].isna().all() + assert a.isna().all() def test_rolling_kurt_eq_value_fperr(step): # #18804 all rolling kurt for all equal values should return Nan # #46717 update: all equal values should return -3 instead of NaN a = Series([1.1] * 15).rolling(window=10, step=step).kurt() - assert (a[a.index >= 9] == -3).all() - assert a[a.index < 9].isna().all() + assert a.isna().all() From 7e1ac6a3bebcbe8f6d806f03890ea163f126a49f Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Thu, 30 Oct 2025 22:17:35 +0100 Subject: [PATCH 06/13] updated whatsnew docstring --- doc/source/whatsnew/v2.3.3.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index 2e99e0fc8c47c..fe94e0f847562 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -53,6 +53,10 @@ Bug fixes characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`) - Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`) - Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`) +- Fix bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` ``skew()`` and ``kurt()`` calculations + that produced incorrect values for windows containing variance of zero. + The implementation has been updated to return ``NaN`` in these cases, matching skipy.stats behavior + and the non-rolling :meth:`Series.skew` and :meth:`Series.kurt` received the same behavior ensuring consistency. (:issue:`62864`) Other changes ~~~~~~~~~~~~~ From 6275874bf9998e3fbbfa260e30db4533489b69ca Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Sun, 2 Nov 2025 13:20:31 +0100 Subject: [PATCH 07/13] removed depreciated kurtosis functionality --- pandas/tests/reductions/test_stat_reductions.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 4af1ca1d4800a..77ac6c3a47ae8 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -257,18 +257,3 @@ def test_kurt(self): alt = lambda x: sp_stats.kurtosis(x, bias=False) self._check_stat_op("kurt", alt, string_series) - - def test_kurt_corner(self): - # test corner cases, kurt() returns NaN unless there's at least 4 - # values - min_N = 4 - for i in range(1, min_N + 1): - s = Series(np.ones(i)) - df = DataFrame(np.ones((i, i))) - if i < min_N: - assert np.isnan(s.kurt()) - assert np.isnan(df.kurt()).all() - else: - assert 0 == s.kurt() - assert isinstance(s.kurt(), np.float64) # GH53482 - assert (df.kurt() == 0).all() From 86f416d43a5ba8194647e2af606d8c8dccd7b514 Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Sun, 2 Nov 2025 13:24:48 +0100 Subject: [PATCH 08/13] removed depreciated skew corner case --- .../tests/reductions/test_stat_reductions.py | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 77ac6c3a47ae8..5750946e9904d 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -9,7 +9,6 @@ import pandas as pd from pandas import ( - DataFrame, Series, date_range, ) @@ -228,28 +227,6 @@ def test_sem(self): result = s.sem(ddof=1) assert pd.isna(result) - def test_skew(self): - sp_stats = pytest.importorskip("scipy.stats") - - string_series = Series(range(20), dtype=np.float64, name="series") - - alt = lambda x: sp_stats.skew(x, bias=False) - self._check_stat_op("skew", alt, string_series) - - # test corner cases, skew() returns NaN unless there's at least 3 - # values - min_N = 3 - for i in range(1, min_N + 1): - s = Series(np.ones(i)) - df = DataFrame(np.ones((i, i))) - if i < min_N: - assert np.isnan(s.skew()) - assert np.isnan(df.skew()).all() - else: - assert 0 == s.skew() - assert isinstance(s.skew(), np.float64) # GH53482 - assert (df.skew() == 0).all() - def test_kurt(self): sp_stats = pytest.importorskip("scipy.stats") From 33695069c322decad12ba26dc94faac89067e642 Mon Sep 17 00:00:00 2001 From: Sebastian Goldmann Date: Sun, 2 Nov 2025 13:36:00 +0100 Subject: [PATCH 09/13] adjusted test on rolling kurt and skew --- pandas/tests/window/test_rolling.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 18aafa0d7b71e..a65d3607ff533 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1833,10 +1833,12 @@ def test_rolling_skew_kurt_floating_artifacts(): sr = Series([1 / 3, 4, 0, 0, 0, 0, 0]) r = sr.rolling(4) + result = r.skew() - assert (result[-2:] == 0).all() + assert np.isnan(result[-2:]).all() + result = r.kurt() - assert (result[-2:] == -3).all() + assert np.isnan(result[-2:]).all() def test_numeric_only_frame(arithmetic_win_operators, numeric_only): From e2b8b7f3f619d284f2959d5d43e2a38fda0f1b9b Mon Sep 17 00:00:00 2001 From: SebastianGoldmann <112079303+SebastianGoldmann@users.noreply.github.com> Date: Sun, 2 Nov 2025 20:13:32 +0100 Subject: [PATCH 10/13] Apply suggestion from @Alvaro-Kothe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Kothe --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 917b62227e687..1280ba7b8fab4 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -498,7 +498,7 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, if nobs < 3: result = NaN # GH 42064 46431 - # uniform case, force result to be NaN + # degenerate case, force result to be NaN elif num_consecutive_same_value >= nobs: result = NaN # #18044: with uniform distribution, floating issue will From 98356d67539619883d495f6252c02d335da9e698 Mon Sep 17 00:00:00 2001 From: SebastianGoldmann <112079303+SebastianGoldmann@users.noreply.github.com> Date: Sun, 2 Nov 2025 20:13:41 +0100 Subject: [PATCH 11/13] Apply suggestion from @Alvaro-Kothe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Kothe --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 1280ba7b8fab4..30fb3946060db 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -501,7 +501,7 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, # degenerate case, force result to be NaN elif num_consecutive_same_value >= nobs: result = NaN - # #18044: with uniform distribution, floating issue will + # #18044: with degenerate distribution, floating issue will # cause B != 0. and cause the result is a very # large number. # From 2dd44a8798a91a52479cfbf74fb3b3609a23a66b Mon Sep 17 00:00:00 2001 From: SebastianGoldmann <112079303+SebastianGoldmann@users.noreply.github.com> Date: Sun, 2 Nov 2025 20:14:49 +0100 Subject: [PATCH 12/13] Apply suggestion from @Alvaro-Kothe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Kothe --- pandas/tests/window/test_rolling_skew_kurt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 35d0a88d292ac..7c136b1011ef7 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -213,7 +213,6 @@ def test_rolling_kurt_edge_cases(step): def test_rolling_skew_eq_value_fperr(step): # #18804 all rolling skew for all equal values should return NaN - # #46717 update: all equal values should return 0 instead of NaN a = Series([1.1] * 15).rolling(window=10, step=step).skew() assert a.isna().all() From 251d809964ee08400b089905afd6c5c204e12d4e Mon Sep 17 00:00:00 2001 From: SebastianGoldmann <112079303+SebastianGoldmann@users.noreply.github.com> Date: Sun, 2 Nov 2025 20:15:05 +0100 Subject: [PATCH 13/13] Apply suggestion from @Alvaro-Kothe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Kothe --- pandas/tests/window/test_rolling_skew_kurt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 7c136b1011ef7..370cd6113a504 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -219,6 +219,5 @@ def test_rolling_skew_eq_value_fperr(step): def test_rolling_kurt_eq_value_fperr(step): # #18804 all rolling kurt for all equal values should return Nan - # #46717 update: all equal values should return -3 instead of NaN a = Series([1.1] * 15).rolling(window=10, step=step).kurt() assert a.isna().all()