Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Added nan_policy to circ functions in stats #10710

Merged
merged 18 commits into from Oct 20, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions THANKS.txt
Expand Up @@ -217,6 +217,7 @@ Christian Brueffer for improvements to code readability/style and documentation.
Timothy C. Willard for contributions to x-value requirements in scipy.interpolate.
Andrew Knyazev, the original author of LOBPCG, for advice on and maintenance of
sparse.linalg.lobpcg
Angeline G. Burrell for implementing nan_policy in the circular statistics


Institutions
Expand Down
42 changes: 35 additions & 7 deletions scipy/stats/morestats.py
Expand Up @@ -3147,16 +3147,32 @@ def median_test(*args, **kwds):
return stat, p, grand_median, table


def _circfuncs_common(samples, high, low):
def _circfuncs_common(samples, high, low, **kwds):
# Ensure samples are array-like
samples = np.asarray(samples)

# Read and apply the NaN policy, defaulting to propagate (previous behavior)
nan_policy = kwds.pop('nan_policy', 'propagate')
aburrell marked this conversation as resolved.
Show resolved Hide resolved
if len(kwds) > 0:
raise TypeError("unexpected keyword argument %r" % kwds.keys()[0])

aburrell marked this conversation as resolved.
Show resolved Hide resolved
contains_nan, nan_policy = _contains_nan(samples, nan_policy)
if contains_nan:
if nan_policy == 'propagate':
return np.nan, np.nan
aburrell marked this conversation as resolved.
Show resolved Hide resolved
else:
samples = samples[~np.isnan(samples)]

# Test the sample size
if samples.size == 0:
return np.nan, np.nan

# Recast samples as radians that range between 0 and 2 pi
ang = (samples - low)*2.*pi / (high - low)
return samples, ang


def circmean(samples, high=2*pi, low=0, axis=None):
def circmean(samples, high=2*pi, low=0, axis=None, **kwds):
"""
Compute the circular mean for samples in a range.

Expand All @@ -3171,6 +3187,10 @@ def circmean(samples, high=2*pi, low=0, axis=None):
axis : int, optional
Axis along which means are computed. The default is to compute
the mean of the flattened array.
nan_policy : {'propagate', 'raise', 'omit'}, optional
Defines how to handle when input contains nan. 'propagate' returns nan,
'raise' throws an error, 'omit' performs the calculations ignoring nan
values. Default is 'propagate'.

Returns
-------
Expand All @@ -3188,7 +3208,7 @@ def circmean(samples, high=2*pi, low=0, axis=None):
0.4

"""
samples, ang = _circfuncs_common(samples, high, low)
samples, ang = _circfuncs_common(samples, high, low, **kwds)
aburrell marked this conversation as resolved.
Show resolved Hide resolved
S = sin(ang).sum(axis=axis)
C = cos(ang).sum(axis=axis)
res = arctan2(S, C)
Expand All @@ -3200,7 +3220,7 @@ def circmean(samples, high=2*pi, low=0, axis=None):
return res*(high - low)/2.0/pi + low


def circvar(samples, high=2*pi, low=0, axis=None):
def circvar(samples, high=2*pi, low=0, axis=None, **kwds):
"""
Compute the circular variance for samples assumed to be in a range

Expand All @@ -3215,6 +3235,10 @@ def circvar(samples, high=2*pi, low=0, axis=None):
axis : int, optional
Axis along which variances are computed. The default is to compute
the variance of the flattened array.
nan_policy : {'propagate', 'raise', 'omit'}, optional
Defines how to handle when input contains nan. 'propagate' returns nan,
'raise' throws an error, 'omit' performs the calculations ignoring nan
values. Default is 'propagate'.

Returns
-------
Expand All @@ -3233,14 +3257,14 @@ def circvar(samples, high=2*pi, low=0, axis=None):
2.19722457734

"""
samples, ang = _circfuncs_common(samples, high, low)
samples, ang = _circfuncs_common(samples, high, low, **kwds)
S = sin(ang).mean(axis=axis)
C = cos(ang).mean(axis=axis)
R = hypot(S, C)
return ((high - low)/2.0/pi)**2 * 2 * log(1/R)


def circstd(samples, high=2*pi, low=0, axis=None):
def circstd(samples, high=2*pi, low=0, axis=None, **kwds):
"""
Compute the circular standard deviation for samples assumed to be in the
range [low to high].
Expand All @@ -3257,6 +3281,10 @@ def circstd(samples, high=2*pi, low=0, axis=None):
axis : int, optional
Axis along which standard deviations are computed. The default is
to compute the standard deviation of the flattened array.
nan_policy : {'propagate', 'raise', 'omit'}, optional
Defines how to handle when input contains nan. 'propagate' returns nan,
'raise' throws an error, 'omit' performs the calculations ignoring nan
values. Default is 'propagate'.

Returns
-------
Expand All @@ -3275,7 +3303,7 @@ def circstd(samples, high=2*pi, low=0, axis=None):
0.063564063306

"""
samples, ang = _circfuncs_common(samples, high, low)
samples, ang = _circfuncs_common(samples, high, low, **kwds)
S = sin(ang).mean(axis=axis)
C = cos(ang).mean(axis=axis)
R = hypot(S, C)
Expand Down
45 changes: 45 additions & 0 deletions scipy/stats/tests/test_morestats.py
Expand Up @@ -1621,6 +1621,51 @@ def test_empty(self):
assert_(np.isnan(stats.circstd([])))
assert_(np.isnan(stats.circvar([])))

def test_nan_propagate(self):
x = [355, 5, 2, 359, 10, 350, np.nan]
assert_(np.isnan(stats.circmean(x, high=360)))
assert_(np.isnan(stats.circstd(x, high=360)))
assert_(np.isnan(stats.circvar(x, high=360)))

def test_nan_omit(self):
x = [355, 5, 2, 359, 10, 350, np.nan]
assert_allclose(stats.circmean(x, high=360, nan_policy='omit'),
0.167690146, rtol=1e-7)
assert_allclose(stats.circvar(x, high=360, nan_policy='omit'),
42.51955609, rtol=1e-7)
assert_allclose(stats.circstd(x, high=360, nan_policy='omit'),
6.520702116, rtol=1e-7)

def test_nan_omit_all(self):
x = [np.nan, np.nan, np.nan, np.nan, np.nan]
assert_(np.isnan(stats.circmean(x, nan_policy='omit')))
assert_(np.isnan(stats.circvar(x, nan_policy='omit')))
assert_(np.isnan(stats.circstd(x, nan_policy='omit')))

def test_nan_raise(self):
x = [355, 5, 2, 359, 10, 350, np.nan]
assert_raises(ValueError, stats.circmean, x, high=360,
nan_policy='raise')
assert_raises(ValueError, stats.circvar, x, high=360,
nan_policy='raise')
assert_raises(ValueError, stats.circstd, x, high=360,
nan_policy='raise')

def test_bad_nan_policy(self):
x = [355, 5, 2, 359, 10, 350, np.nan]
assert_raises(ValueError, stats.circmean, x, high=360,
nan_policy='foobar')
assert_raises(ValueError, stats.circvar, x, high=360,
nan_policy='foobar')
assert_raises(ValueError, stats.circstd, x, high=360,
nan_policy='foobar')

def test_bad_keyword(self):
x = [355, 5, 2, 359, 10, 350, np.nan]
assert_raises(TypeError, stats.circmean, x, high=360, foo="foo")
assert_raises(TypeError, stats.circvar, x, high=360, foo="foo")
assert_raises(TypeError, stats.circstd, x, high=360, foo="foo")
aburrell marked this conversation as resolved.
Show resolved Hide resolved

def test_circmean_scalar(self):
x = 1.
M1 = x
Expand Down