Skip to content
Open
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ Bug fixes
characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
- Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`)
- Fix bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` ``skew()`` and ``kurt()`` calculations
that produced incorrect values for windows containing variance of zero.
The implementation has been updated to return ``NaN`` in these cases, matching skipy.stats behavior
and the non-rolling :meth:`Series.skew` and :meth:`Series.kurt` received the same behavior ensuring consistency. (:issue:`62864`)

Other changes
~~~~~~~~~~~~~
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,10 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
if nobs < 3:
result = NaN
# GH 42064 46431
# uniform case, force result to be 0
# degenerate case, force result to be NaN
elif num_consecutive_same_value >= nobs:
result = 0.0
# #18044: with uniform distribution, floating issue will
result = NaN
# #18044: with degenerate distribution, floating issue will
# cause B != 0. and cause the result is a very
# large number.
#
Expand Down Expand Up @@ -694,7 +694,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
# GH 42064 46431
# uniform case, force result to be -3.
elif num_consecutive_same_value >= nobs:
result = -3.
result = NaN
else:
dnobs = <float64_t>nobs
A = x / dnobs
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,10 +1287,10 @@ def nanskew(
result = result.astype(dtype, copy=False)

if isinstance(result, np.ndarray):
result = np.where(m2 == 0, 0, result)
result = np.where(m2 == 0, np.nan, result)
result[count < 3] = np.nan
else:
result = dtype.type(0) if m2 == 0 else result
result = np.nan if m2 == 0 else result
if count < 3:
return np.nan

Expand Down Expand Up @@ -1400,7 +1400,7 @@ def nankurt(
if count < 4:
return np.nan
if denominator == 0:
return values.dtype.type(0)
return np.nan

with np.errstate(invalid="ignore", divide="ignore"):
result = numerator / denominator - adj
Expand All @@ -1410,7 +1410,7 @@ def nankurt(
result = result.astype(dtype, copy=False)

if isinstance(result, np.ndarray):
result = np.where(denominator == 0, 0, result)
result = np.where(denominator == 0, np.nan, result)
result[count < 4] = np.nan

return result
Expand Down
38 changes: 0 additions & 38 deletions pandas/tests/reductions/test_stat_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import pandas as pd
from pandas import (
DataFrame,
Series,
date_range,
)
Expand Down Expand Up @@ -228,47 +227,10 @@ def test_sem(self):
result = s.sem(ddof=1)
assert pd.isna(result)

def test_skew(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you removing this test?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HI Alvaro,

Thank you for the review.

In my previous commit I thought these were duplicated tests, but looking at it again these should not be removed. I will revert and update the tests.

sp_stats = pytest.importorskip("scipy.stats")

string_series = Series(range(20), dtype=np.float64, name="series")

alt = lambda x: sp_stats.skew(x, bias=False)
self._check_stat_op("skew", alt, string_series)

# test corner cases, skew() returns NaN unless there's at least 3
# values
min_N = 3
for i in range(1, min_N + 1):
s = Series(np.ones(i))
df = DataFrame(np.ones((i, i)))
if i < min_N:
assert np.isnan(s.skew())
assert np.isnan(df.skew()).all()
else:
assert 0 == s.skew()
assert isinstance(s.skew(), np.float64) # GH53482
assert (df.skew() == 0).all()

def test_kurt(self):
sp_stats = pytest.importorskip("scipy.stats")

string_series = Series(range(20), dtype=np.float64, name="series")

alt = lambda x: sp_stats.kurtosis(x, bias=False)
self._check_stat_op("kurt", alt, string_series)

def test_kurt_corner(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you removing this test?

# test corner cases, kurt() returns NaN unless there's at least 4
# values
min_N = 4
for i in range(1, min_N + 1):
s = Series(np.ones(i))
df = DataFrame(np.ones((i, i)))
if i < min_N:
assert np.isnan(s.kurt())
assert np.isnan(df.kurt()).all()
else:
assert 0 == s.kurt()
assert isinstance(s.kurt(), np.float64) # GH53482
assert (df.kurt() == 0).all()
8 changes: 4 additions & 4 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,10 +559,10 @@ def _skew_kurt_wrap(self, values, axis=None, func=None):
result = func(values, axis=axis, bias=False)
# fix for handling cases where all elements in an axis are the same
if isinstance(result, np.ndarray):
result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
result[np.max(values, axis=axis) == np.min(values, axis=axis)] = np.nan
return result
elif np.max(values) == np.min(values):
return 0.0
return np.nan
Comment on lines 560 to +565
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these branches can be removed, since SciPy already assigns NaN.

Suggested change
# fix for handling cases where all elements in an axis are the same
if isinstance(result, np.ndarray):
result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
result[np.max(values, axis=axis) == np.min(values, axis=axis)] = np.nan
return result
elif np.max(values) == np.min(values):
return 0.0
return np.nan

return result

def test_nanskew(self, skipna):
Expand Down Expand Up @@ -1021,7 +1021,7 @@ def test_constant_series(self, val):
# xref GH 11974
data = val * np.ones(300)
skew = nanops.nanskew(data)
assert skew == 0.0
assert np.isnan(skew)

def test_all_finite(self):
alpha, beta = 0.3, 0.1
Expand Down Expand Up @@ -1089,7 +1089,7 @@ def test_constant_series(self, val):
# xref GH 11974
data = val * np.ones(300)
kurt = nanops.nankurt(data)
tm.assert_equal(kurt, 0.0)
tm.assert_equal(kurt, np.nan)

def test_all_finite(self):
alpha, beta = 0.3, 0.1
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1833,10 +1833,12 @@ def test_rolling_skew_kurt_floating_artifacts():

sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
r = sr.rolling(4)

result = r.skew()
assert (result[-2:] == 0).all()
assert np.isnan(result[-2:]).all()

result = r.kurt()
assert (result[-2:] == -3).all()
assert np.isnan(result[-2:]).all()


def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
Expand Down
14 changes: 5 additions & 9 deletions pandas/tests/window/test_rolling_skew_kurt.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_center_reindex_frame(frame, roll_func):


def test_rolling_skew_edge_cases(step):
expected = Series([np.nan] * 4 + [0.0])[::step]
expected = Series([np.nan] * 5)[::step]
# yields all NaN (0 variance)
d = Series([1] * 5)
x = d.rolling(window=5, step=step).skew()
Expand All @@ -191,7 +191,7 @@ def test_rolling_skew_edge_cases(step):


def test_rolling_kurt_edge_cases(step):
expected = Series([np.nan] * 4 + [-3.0])[::step]
expected = Series([np.nan] * 5)[::step]

# yields all NaN (0 variance)
d = Series([1] * 5)
Expand All @@ -212,16 +212,12 @@ def test_rolling_kurt_edge_cases(step):


def test_rolling_skew_eq_value_fperr(step):
# #18804 all rolling skew for all equal values should return Nan
# #46717 update: all equal values should return 0 instead of NaN
# #18804 all rolling skew for all equal values should return NaN
a = Series([1.1] * 15).rolling(window=10, step=step).skew()
assert (a[a.index >= 9] == 0).all()
assert a[a.index < 9].isna().all()
assert a.isna().all()


def test_rolling_kurt_eq_value_fperr(step):
# #18804 all rolling kurt for all equal values should return Nan
# #46717 update: all equal values should return -3 instead of NaN
a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
assert (a[a.index >= 9] == -3).all()
assert a[a.index < 9].isna().all()
assert a.isna().all()
Loading