Skip to content

Commit

Permalink
BUG: better floating point robustness in moving window functions. close
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Nov 4, 2012
1 parent 022e630 commit 86ab2e9
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 28 deletions.
1 change: 1 addition & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pandas 0.9.1
- Fix variety of cut/qcut string-bin formatting bugs (#1978, #1979)
- Raise Exception when xs view not possible of MultiIndex'd DataFrame (#2117)
- Fix groupby(...).first() issue with datetime64 (#2133)
- Better floating point error robustness in some rolling_* functions (#2114)
pandas 0.9.0
============
Expand Down
58 changes: 31 additions & 27 deletions pandas/src/moments.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -175,16 +175,16 @@ def roll_sum(ndarray[double_t] input, int win, int minp):
for i from minp - 1 <= i < N:
val = input[i]

if val == val:
nobs += 1
sum_x += val

if i > win - 1:
prev = input[i - win]
if prev == prev:
sum_x -= prev
nobs -= 1

if val == val:
nobs += 1
sum_x += val

if nobs >= minp:
output[i] = sum_x
else:
Expand Down Expand Up @@ -218,16 +218,16 @@ def roll_mean(ndarray[double_t] input,
for i from minp - 1 <= i < N:
val = input[i]

if val == val:
nobs += 1
sum_x += val

if i > win - 1:
prev = input[i - win]
if prev == prev:
sum_x -= prev
nobs -= 1

if val == val:
nobs += 1
sum_x += val

if nobs >= minp:
output[i] = sum_x / nobs
else:
Expand Down Expand Up @@ -371,25 +371,29 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
for i from minp - 1 <= i < N:
val = input[i]

if val == val:
nobs += 1
sum_x += val
sum_xx += val * val

if i > win - 1:
prev = input[i - win]
if prev == prev:
sum_x -= prev
sum_xx -= prev * prev
nobs -= 1

if val == val:
nobs += 1
sum_x += val
sum_xx += val * val

if nobs >= minp:
# pathological case
if nobs == 1:
output[i] = 0
continue

output[i] = (nobs * sum_xx - sum_x * sum_x) / (nobs * (nobs - ddof))
val = (nobs * sum_xx - sum_x * sum_x) / (nobs * (nobs - ddof))
if val < 0:
val = 0

output[i] = val
else:
output[i] = NaN

Expand Down Expand Up @@ -426,6 +430,12 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
for i from minp - 1 <= i < N:
val = input[i]

if val == val:
nobs += 1
x += val
xx += val * val
xxx += val * val * val

if i > win - 1:
prev = input[i - win]
if prev == prev:
Expand All @@ -435,12 +445,6 @@ def roll_skew(ndarray[double_t] input, int win, int minp):

nobs -= 1

if val == val:
nobs += 1
x += val
xx += val * val
xxx += val * val * val

if nobs >= minp:
A = x / nobs
B = xx / nobs - A * A
Expand Down Expand Up @@ -491,6 +495,13 @@ def roll_kurt(ndarray[double_t] input,
for i from minp - 1 <= i < N:
val = input[i]

if val == val:
nobs += 1
x += val
xx += val * val
xxx += val * val * val
xxxx += val * val * val * val

if i > win - 1:
prev = input[i - win]
if prev == prev:
Expand All @@ -501,13 +512,6 @@ def roll_kurt(ndarray[double_t] input,

nobs -= 1

if val == val:
nobs += 1
x += val
xx += val * val
xxx += val * val * val
xxxx += val * val * val * val

if nobs >= minp:
A = x / nobs
R = A * A
Expand Down
20 changes: 20 additions & 0 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pandas import Series, DataFrame, bdate_range, isnull, notnull
from pandas.util.testing import assert_almost_equal, assert_series_equal
from pandas.util.py3compat import PY3
import pandas.core.datetools as datetools
import pandas.stats.moments as mom
import pandas.util.testing as tm
Expand Down Expand Up @@ -161,6 +162,25 @@ def test_rolling_kurt(self):
self._check_moment_func(mom.rolling_kurt,
lambda x: kurtosis(x, bias=False))

def test_fperr_robustness(self):
# TODO: remove this once python 2.5 out of picture
if PY3:
raise nose.SkipTest

# #2114
data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>'

arr = np.frombuffer(data, dtype='<f8')

result = mom.rolling_sum(arr, 2)
self.assertTrue((result[1:] >= 0).all())

result = mom.rolling_mean(arr, 2)
self.assertTrue((result[1:] >= 0).all())

result = mom.rolling_var(arr, 2)
self.assertTrue((result[1:] >= 0).all())

def _check_moment_func(self, func, static_comp, window=50,
has_min_periods=True,
has_time_rule=True,
Expand Down
3 changes: 2 additions & 1 deletion pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def _resample_timestamps(self, obj):
result = grouped.aggregate(self._agg_method)

if self.fill_method is not None:
result = result.fillna(method=self.fill_method, limit=self.limit)
result = result.fillna(method=self.fill_method,
limit=self.limit)

loffset = self.loffset
if isinstance(loffset, basestring):
Expand Down

0 comments on commit 86ab2e9

Please sign in to comment.