Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

497 lines (426 sloc) 13.585 kb
import numpy as np
__all__ = ['median', 'nanmedian', 'nansum', 'nanmean', 'nanvar', 'nanstd',
'nanmin', 'nanmax', 'nanargmin', 'nanargmax', 'rankdata',
'nanrankdata', 'ss', 'nn', 'partsort', 'argpartsort', 'replace',
'anynan', 'allnan']
def median(arr, axis=None):
"Slow median function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
y = np.median(arr, axis=axis)
if y.dtype != arr.dtype:
if issubclass(arr.dtype.type, np.inexact):
y = y.astype(arr.dtype)
return y
def nansum(arr, axis=None):
"Slow nansum function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
y = np.nansum(arr, axis=axis)
if not hasattr(y, "dtype"):
y = arr.dtype.type(y)
if y.dtype != arr.dtype:
if issubclass(arr.dtype.type, np.inexact):
y = y.astype(arr.dtype)
return y
def nanmedian(arr, axis=None):
"Slow nanmedian function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
y = scipy_nanmedian(arr, axis=axis)
if not hasattr(y, "dtype"):
if issubclass(arr.dtype.type, np.inexact):
y = arr.dtype.type(y)
else:
y = np.float64(y)
if y.dtype != arr.dtype:
if issubclass(arr.dtype.type, np.inexact):
y = y.astype(arr.dtype)
if (y.size == 1) and (y.ndim == 0):
y = y[()]
return y
def nanmean(arr, axis=None):
"Slow nanmean function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
y = scipy_nanmean(arr, axis=axis)
if y.dtype != arr.dtype:
if issubclass(arr.dtype.type, np.inexact):
y = y.astype(arr.dtype)
return y
def nanvar(arr, axis=None, ddof=0):
"Slow nanvar function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
y = nanstd(arr, axis=axis, ddof=ddof)
return y * y
def nanstd(arr, axis=None, ddof=0):
"Slow nanstd function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
if ddof == 0:
bias = True
elif ddof == 1:
bias = False
else:
raise ValueError("With NaNs ddof must be 0 or 1.")
if axis != None:
# Older versions of scipy can't handle negative axis?
if axis < 0:
axis += arr.ndim
if (axis < 0) or (axis >= arr.ndim):
raise ValueError("axis(=%d) out of bounds" % axis)
else:
# Older versions of scipy choke on axis=None
arr = arr.ravel()
axis = 0
y = scipy_nanstd(arr, axis=axis, bias=bias)
if y.dtype != arr.dtype:
if issubclass(arr.dtype.type, np.inexact):
y = y.astype(arr.dtype)
return y
def nanmin(arr, axis=None):
"Slow nanmin function used for unaccelerated ndim/dtype combinations."
y = np.nanmin(arr, axis=axis)
if not hasattr(y, "dtype"):
# Numpy 1.5.1 doesn't return object with dtype when input is all NaN
y = arr.dtype.type(y)
return y
def nanmax(arr, axis=None):
"Slow nanmax function used for unaccelerated ndim/dtype combinations."
y = np.nanmax(arr, axis=axis)
if not hasattr(y, "dtype"):
# Numpy 1.5.1 doesn't return object with dtype when input is all NaN
y = arr.dtype.type(y)
return y
def nanargmin(arr, axis=None):
"Slow nanargmin function used for unaccelerated ndim/dtype combinations."
return np.nanargmin(arr, axis=axis)
def nanargmax(arr, axis=None):
"Slow nanargmax function used for unaccelerated ndim/dtype combinations."
return np.nanargmax(arr, axis=axis)
def rankdata(arr, axis=None):
"Slow rankdata function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
if axis is None:
arr = arr.ravel()
axis = 0
elif axis < 0:
axis = range(arr.ndim)[axis]
y = np.empty(arr.shape)
itshape = list(arr.shape)
itshape.pop(axis)
for ij in np.ndindex(*itshape):
ijslice = list(ij[:axis]) + [slice(None)] + list(ij[axis:])
y[ijslice] = scipy_rankdata(arr[ijslice].astype('float'))
return y
def nanrankdata(arr, axis=None):
"Slow nanrankdata function used for unaccelerated ndim/dtype combinations."
arr = np.asarray(arr)
if axis is None:
arr = arr.ravel()
axis = 0
elif axis < 0:
axis = range(arr.ndim)[axis]
y = np.empty(arr.shape)
y.fill(np.nan)
itshape = list(arr.shape)
itshape.pop(axis)
for ij in np.ndindex(*itshape):
ijslice = list(ij[:axis]) + [slice(None)] + list(ij[axis:])
x1d = arr[ijslice].astype(float)
mask1d = ~np.isnan(x1d)
x1d[mask1d] = scipy_rankdata(x1d[mask1d])
y[ijslice] = x1d
return y
def ss(arr, axis=0):
"Slow sum of squares used for unaccelerated ndim/dtype combinations."
return scipy_ss(arr, axis)
def nn(arr, arr0, axis=1):
"Slow nearest neighbor used for unaccelerated ndim/dtype combinations."
if arr.ndim != 2:
raise ValueError("`arr` must be 2d")
if arr0.ndim != 1:
raise ValueError("`arr0` must be 1d")
if axis == 1:
d = (arr - arr0) ** 2
elif axis == 0:
d = (arr - arr0.reshape(-1,1)) ** 2
else:
raise ValueError("`axis` must be 0 or 1.")
d = d.sum(axis)
idx = np.argmin(d)
return np.sqrt(d[idx]), idx
def partsort(arr, n, axis=-1):
"Slow partial sort used for unaccelerated ndim/dtype combinations."
return np.sort(arr, axis)
def argpartsort(arr, n, axis=-1):
"Slow partial argsort used for unaccelerated ndim/dtype combinations."
return np.argsort(arr, axis)
def replace(arr, old, new):
"Slow replace (inplace) used for unaccelerated ndim/dtype combinations."
if type(arr) is not np.ndarray:
raise TypeError("`arr` must be a numpy array.")
if not issubclass(arr.dtype.type, np.inexact):
if int(old) != old:
raise ValueError("Cannot safely cast `old` to int.")
if int(new) != new:
raise ValueError("Cannot safely cast `new` to int.")
if old != old:
mask = np.isnan(arr)
else:
mask = arr == old
np.putmask(arr, mask, new)
def anynan(arr, axis=None):
"Slow check for Nans used for unaccelerated ndim/dtype combinations."
return np.isnan(arr).any(axis)
def allnan(arr, axis=None):
"Slow check for all Nans used for unaccelerated ndim/dtype combinations."
return np.isnan(arr).all(axis)
# ---------------------------------------------------------------------------
#
# SciPy
#
# Local copy of scipy.stats functions to avoid (by popular demand) a SciPy
# dependency. The SciPy license is included in the Bottleneck license file,
# which is distributed with Bottleneck.
#
# Code taken from scipy trunk on Dec 16, 2010.
# nanmedian taken from scipy trunk on Dec 17, 2010.
# rankdata taken from scipy HEAD on Mar 16, 2011.
def scipy_nanmean(x, axis=0):
"""
Compute the mean over the given axis ignoring nans.
Parameters
----------
x : ndarray
Input array.
axis : int, optional
Axis along which the mean is computed. Default is 0, i.e. the
first axis.
Returns
-------
m : float
The mean of `x`, ignoring nans.
See Also
--------
nanstd, nanmedian
Examples
--------
>>> from scipy import stats
>>> a = np.linspace(0, 4, 3)
>>> a
array([ 0., 2., 4.])
>>> a[-1] = np.nan
>>> stats.nanmean(a)
1.0
"""
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
factor = 1.0-np.sum(np.isnan(x),axis)*1.0/Norig
x[np.isnan(x)] = 0
return np.mean(x,axis)/factor
def scipy_nanstd(x, axis=0, bias=False):
"""
Compute the standard deviation over the given axis, ignoring nans.
Parameters
----------
x : array_like
Input array.
axis : int or None, optional
Axis along which the standard deviation is computed. Default is 0.
If None, compute over the whole array `x`.
bias : bool, optional
If True, the biased (normalized by N) definition is used. If False
(default), the unbiased definition is used.
Returns
-------
s : float
The standard deviation.
See Also
--------
nanmean, nanmedian
Examples
--------
>>> from scipy import stats
>>> a = np.arange(10, dtype=float)
>>> a[1:3] = np.nan
>>> np.std(a)
nan
>>> stats.nanstd(a)
2.9154759474226504
>>> stats.nanstd(a.reshape(2, 5), axis=1)
array([ 2.0817, 1.5811])
>>> stats.nanstd(a.reshape(2, 5), axis=None)
2.9154759474226504
"""
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
Nnan = np.sum(np.isnan(x),axis)*1.0
n = Norig - Nnan
x[np.isnan(x)] = 0.
m1 = np.sum(x,axis)/n
if axis:
d = (x - np.expand_dims(m1, axis))**2.0
else:
d = (x - m1)**2.0
m2 = np.sum(d,axis)-(m1*m1)*Nnan
if bias:
m2c = m2 / n
else:
m2c = m2 / (n - 1.)
return np.sqrt(m2c)
def _nanmedian(arr1d): # This only works on 1d arrays
"""Private function for rank a arrays. Compute the median ignoring Nan.
Parameters
----------
arr1d : ndarray
Input array, of rank 1.
Results
-------
m : float
The median.
"""
cond = 1-np.isnan(arr1d)
x = np.sort(np.compress(cond,arr1d,axis=-1))
if x.size == 0:
return np.nan
return np.median(x)
# Feb 2011: patched nanmedian to handle nanmedian(a, 1) with a = np.ones((2,0))
def scipy_nanmedian(x, axis=0):
"""
Compute the median along the given axis ignoring nan values.
Parameters
----------
x : array_like
Input array.
axis : int, optional
Axis along which the median is computed. Default is 0, i.e. the
first axis.
Returns
-------
m : float
The median of `x` along `axis`.
See Also
--------
nanstd, nanmean
Examples
--------
>>> from scipy import stats
>>> a = np.array([0, 3, 1, 5, 5, np.nan])
>>> stats.nanmedian(a)
array(3.0)
>>> b = np.array([0, 3, 1, 5, 5, np.nan, 5])
>>> stats.nanmedian(b)
array(4.0)
Example with axis:
>>> c = np.arange(30.).reshape(5,6)
>>> idx = np.array([False, False, False, True, False] * 6).reshape(5,6)
>>> c[idx] = np.nan
>>> c
array([[ 0., 1., 2., nan, 4., 5.],
[ 6., 7., nan, 9., 10., 11.],
[ 12., nan, 14., 15., 16., 17.],
[ nan, 19., 20., 21., 22., nan],
[ 24., 25., 26., 27., nan, 29.]])
>>> stats.nanmedian(c, axis=1)
array([ 2. , 9. , 15. , 20.5, 26. ])
"""
x, axis = _chk_asarray(x, axis)
if x.ndim == 0:
return float(x.item())
shape = list(x.shape)
shape.pop(axis)
if 0 in shape:
x = np.empty(shape)
else:
x = x.copy()
x = np.apply_along_axis(_nanmedian, axis, x)
if x.ndim == 0:
x = float(x.item())
return x
def _chk_asarray(a, axis):
if axis is None:
a = np.ravel(a)
outaxis = 0
else:
a = np.asarray(a)
outaxis = axis
return a, outaxis
def fastsort(a):
"""
Sort an array and provide the argsort.
Parameters
----------
a : array_like
Input array.
Returns
-------
fastsort : ndarray of type int
sorted indices into the original array
"""
# TODO: the wording in the docstring is nonsense.
it = np.argsort(a)
as_ = a[it]
return as_, it
def scipy_rankdata(a):
"""
Ranks the data, dealing with ties appropriately.
Equal values are assigned a rank that is the average of the ranks that
would have been otherwise assigned to all of the values within that set.
Ranks begin at 1, not 0.
Parameters
----------
a : array_like
This array is first flattened.
Returns
-------
rankdata : ndarray
An array of length equal to the size of `a`, containing rank scores.
Examples
--------
>>> stats.rankdata([0, 2, 2, 3])
array([ 1. , 2.5, 2.5, 4. ])
"""
a = np.ravel(a)
n = len(a)
svec, ivec = fastsort(a)
sumranks = 0
dupcount = 0
newarray = np.zeros(n, float)
for i in range(n):
sumranks += i
dupcount += 1
if i==n-1 or svec[i] != svec[i+1]:
averank = sumranks / float(dupcount) + 1
for j in range(i-dupcount+1,i+1):
newarray[ivec[j]] = averank
sumranks = 0
dupcount = 0
return newarray
def scipy_ss(a, axis=0):
"""
Squares each element of the input array, and returns the square(s) of that.
Parameters
----------
a : array_like
Input array.
axis : int or None, optional
The axis along which to calculate. If None, use whole array.
Default is 0, i.e. along the first axis.
Returns
-------
ss : ndarray
The sum along the given axis for (a**2).
See also
--------
square_of_sums : The square(s) of the sum(s) (the opposite of `ss`).
Examples
--------
>>> from scipy import stats
>>> a = np.array([1., 2., 5.])
>>> stats.ss(a)
30.0
And calculating along an axis:
>>> b = np.array([[1., 2., 5.], [2., 5., 6.]])
>>> stats.ss(b, axis=1)
array([ 30., 65.])
"""
a, axis = _chk_asarray(a, axis)
return np.sum(a*a, axis)
Jump to Line
Something went wrong with that request. Please try again.