From a9eff966d87b28185b68ba15f7ffe0ade0194a07 Mon Sep 17 00:00:00 2001 From: empeeu Date: Wed, 12 Feb 2014 19:10:31 -0500 Subject: [PATCH 01/17] ENH: added proper handling of nans for numpy.lib.function_base.median. This closes ticket# 2126 (issue #586) --- numpy/lib/function_base.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 63b191b07b9e..ec0bb7ca5e66 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -2770,7 +2770,15 @@ def median(a, axis=None, out=None, overwrite_input=False): if axis is not None and axis >= a.ndim: raise IndexError( "axis %d out of bounds (%d)" % (axis, a.ndim)) - + + #Check if the array contains any nan's + ids = None + if axis is None or a.ndim == 1: + if np.any(np.isnan(a)): + return np.array([np.nan]) + else: #continue the calculation but replace results with nans where needed + ids = np.any(np.isnan(a), axis=axis) + if overwrite_input: if axis is None: part = a.ravel() @@ -2809,9 +2817,16 @@ def median(a, axis=None, out=None, overwrite_input=False): indexer[axis] = slice(index, index+1) else: indexer[axis] = slice(index-1, index+1) - # Use mean in odd and even case to coerce data type - # and check, use out array. - return mean(part[indexer], axis=axis, out=out) + + if ids == None: #if there are no nans + # Use mean in odd and even case to coerce data type + # and check, use out array. + return mean(part[indexer], axis=axis, out=out) + else: #replace results where needed with nans + out = mean(part[indexer], axis=axis, out=out) + out[ids] = np.nan + return out + def percentile(a, q, axis=None, out=None, From 54aff25fac5186767f023f8ef7a44af7bc9a3721 Mon Sep 17 00:00:00 2001 From: empeeu Date: Wed, 12 Feb 2014 19:40:00 -0500 Subject: [PATCH 02/17] ENH: added unit test for previous commit which updates median's handling of nans in reference to ticket #586 --- numpy/lib/tests/test_function_base.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 003d3e541844..1926dd610a94 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1835,6 +1835,27 @@ def mean(self, axis=None, dtype=None, out=None): a = MySubClass([1,2,3]) assert_equal(np.median(a), -7) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + #no axis + assert_equal(np.median(a), np.nan) + #axis0 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) + b[2, 3] = np.nan; b[1, 2] = np.nan + assert_equal(np.median(a, 0), b) + #axis1 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) + b[1, 3] = np.nan; b[1, 2] = np.nan + assert_equal(np.median(a, 1), b) + class TestAdd_newdoc_ufunc(TestCase): From 10902b56eec6675b41c33a7985428c0117ff8de2 Mon Sep 17 00:00:00 2001 From: empeeu Date: Sun, 16 Feb 2014 19:44:06 -0500 Subject: [PATCH 03/17] ENH: #586. Improved performance of nan handling in median function. Also added a flag "check_nan" that can be turned off to regain original performance. Also added nanmedian function to calculate the median while ignorning nans. --- numpy/lib/function_base.py | 75 ++++++------- numpy/lib/nanfunctions.py | 149 +++++++++++++++++++++++++- numpy/lib/tests/test_function_base.py | 9 ++ 3 files changed, 194 insertions(+), 39 deletions(-) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index ec0bb7ca5e66..9df1080c1e3e 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -2692,7 +2692,7 @@ def msort(a): return b -def median(a, axis=None, out=None, overwrite_input=False): +def median(a, axis=None, out=None, overwrite_input=False, check_nan=True): """ Compute the median along the specified axis. @@ -2717,6 +2717,10 @@ def median(a, axis=None, out=None, overwrite_input=False): but it will probably be fully or partially sorted. Default is False. Note that, if `overwrite_input` is True and the input is not already an ndarray, an error will be raised. + check_nan : bool, optional + If True, then the input array will be tested for the presence of NaNs. + If the input array does not contain NaNs, this flag can be set to False + for a 1%-70% performance increase. Returns ------- @@ -2729,7 +2733,7 @@ def median(a, axis=None, out=None, overwrite_input=False): See Also -------- - mean, percentile + mean, percentile, nanmedian Notes ----- @@ -2771,40 +2775,30 @@ def median(a, axis=None, out=None, overwrite_input=False): raise IndexError( "axis %d out of bounds (%d)" % (axis, a.ndim)) + #Set the partition indexes + if axis is None: + sz = a.size + else: + sz = a.shape[axis] + if sz % 2 == 0: + szh = sz // 2 + kth = [szh - 1, szh] + else: + kth = [(sz - 1) // 2] #Check if the array contains any nan's - ids = None - if axis is None or a.ndim == 1: - if np.any(np.isnan(a)): - return np.array([np.nan]) - else: #continue the calculation but replace results with nans where needed - ids = np.any(np.isnan(a), axis=axis) - + if check_nan and np.issubdtype(a.dtype, np.inexact): + kth.append(-1) + if overwrite_input: if axis is None: part = a.ravel() - sz = part.size - if sz % 2 == 0: - szh = sz // 2 - part.partition((szh - 1, szh)) - else: - part.partition((sz - 1) // 2) + part.partition(kth) else: - sz = a.shape[axis] - if sz % 2 == 0: - szh = sz // 2 - a.partition((szh - 1, szh), axis=axis) - else: - a.partition((sz - 1) // 2, axis=axis) part = a + a.partition(kth, axis=axis) else: - if axis is None: - sz = a.size - else: - sz = a.shape[axis] - if sz % 2 == 0: - part = partition(a, ((sz // 2) - 1, sz // 2), axis=axis) - else: - part = partition(a, (sz - 1) // 2, axis=axis) + part = partition(a, kth, axis=axis) + if part.shape == (): # make 0-D arrays work return part.item() @@ -2817,17 +2811,24 @@ def median(a, axis=None, out=None, overwrite_input=False): indexer[axis] = slice(index, index+1) else: indexer[axis] = slice(index-1, index+1) - - if ids == None: #if there are no nans + #Check if the array contains any nan's + if check_nan and np.issubdtype(a.dtype, np.inexact): + if part.ndim <= 1: + if np.isnan(part[-1]): + return np.nan + else: + return mean(part[indexer], axis=axis, out=out) + else: + nan_indexer = [slice(None)] * part.ndim + nan_indexer[axis] = slice(-1, None) + ids = np.isnan(part[nan_indexer].squeeze(axis)) + out = np.asanyarray(mean(part[indexer], axis=axis, out=out)) + out[ids] = np.nan + return out + else: #if there are no nans # Use mean in odd and even case to coerce data type # and check, use out array. return mean(part[indexer], axis=axis, out=out) - else: #replace results where needed with nans - out = mean(part[indexer], axis=axis, out=out) - out[ids] = np.nan - return out - - def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear'): diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 5766084abfae..a948fe1e0110 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -18,11 +18,11 @@ import warnings import numpy as np - +from numpy.core.fromnumeric import partition __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanvar', 'nanstd' + 'nanmedian', 'nanvar', 'nanstd' ] @@ -601,6 +601,151 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): return avg +def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays + """Private function for rank 1 arrays. Compute the median ignoring NaNs. + + Parameters + ---------- + arr1d : ndarray + Input array, of rank 1. + overwrite_input : bool, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + + Results + ------- + m : float + The median. + """ + + if overwrite_input: + part = arr1d.ravel() + #filter out the NaNs + sz = part.size - np.sum(np.isnan(part)) + if sz == 0: + warnings.warn("All-NaN slice encountered", RuntimeWarning) + return np.nan + if sz % 2 == 0: + szh = sz // 2 + part.partition((szh - 1, szh)) + else: + part.partition((sz - 1) // 2) + else: + sz = arr1d.size - np.sum(np.isnan(arr1d)) + if sz == 0: + warnings.warn("All-NaN slice encountered", RuntimeWarning) + return np.nan + if sz % 2 == 0: + part = partition(arr1d, ((sz // 2) - 1, sz // 2), axis=None) + else: + part = partition(arr1d, (sz - 1) // 2, axis=None) + if part.shape == (): + # make 0-D arrays work + return part.item() + axis = 0 + index = sz // 2 + if sz % 2 == 1: + # index with slice to allow mean (below) to work + indexer = slice(index, index+1) + else: + indexer = slice(index-1, index+1) + + # Use mean in odd and even case to coerce data type + # and check, use out array. + return np.mean(part[indexer], axis=axis) + + +def nanmedian(a, axis=None, overwrite_input=False): + """ + Compute the median along the specified axis, while ignoring NaNs. + + Returns the median of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : int, optional + Axis along which the medians are computed. The default (axis=None) + is to compute the median along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + + Returns + ------- + median : ndarray + A new array holding the result (unless `out` is specified, in + which case that array is returned instead). If the input contains + integers, or floats of smaller precision than 64, then the output + data-type is float64. Otherwise, the output data-type is the same + as that of the input. + + See Also + -------- + mean, percentile + + Notes + ----- + Given a vector V of length N, the median of V is the middle value of + a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is + odd. When N is even, it is the average of the two middle values of + ``V_sorted``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.median(a) + 3.5 + >>> np.median(a, axis=0) + array([ 6.5, 4.5, 2.5]) + >>> np.median(a, axis=1) + array([ 7., 2.]) + >>> m = np.median(a, axis=0) + >>> out = np.zeros_like(m) + >>> np.median(a, axis=0, out=m) + array([ 6.5, 4.5, 2.5]) + >>> m + array([ 6.5, 4.5, 2.5]) + >>> b = a.copy() + >>> np.median(b, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> np.median(b, axis=None, overwrite_input=True) + 3.5 + >>> assert not np.all(a==b) + + """ + a = np.asanyarray(a) + if axis is not None and axis >= a.ndim: + raise IndexError( + "axis %d out of bounds (%d)" % (axis, a.ndim)) + + if axis is None: + part = a.ravel() + return _nanmedian1d(part, overwrite_input) + else: + return np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) + + def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the specified axis, while ignoring NaNs. diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 1926dd610a94..b322be072687 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1837,6 +1837,15 @@ def mean(self, axis=None, dtype=None, out=None): assert_equal(np.median(a), -7) def test_nan_behavior(self): + #Complex + a = np.arange(24, dtype=complex) + a[2] = complex(np.nan, 0) + assert_equal(np.median(a), np.nan) + a[2] = complex(0, np.nan) + assert_equal(np.median(a), np.nan) + a[2] = complex(np.nan, np.nan) + assert_equal(np.median(a), np.nan) + #floats a = np.arange(24, dtype=float) a[2] = np.nan assert_equal(np.median(a), np.nan) From 8c2f007f5d3d0f7516e557864f81d965dbc25047 Mon Sep 17 00:00:00 2001 From: empeeu Date: Sun, 16 Feb 2014 21:03:04 -0500 Subject: [PATCH 04/17] DOC: Updated doc string for nanmedian. --- numpy/lib/nanfunctions.py | 42 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index a948fe1e0110..48a001bf6652 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -673,10 +673,6 @@ def nanmedian(a, axis=None, overwrite_input=False): axis : int, optional Axis along which the medians are computed. The default (axis=None) is to compute the median along a flattened version of the array. - out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output, - but the type (of the output) will be cast if necessary. overwrite_input : bool, optional If True, then allow use of memory of input array (a) for calculations. The input array will be modified by the call to @@ -689,15 +685,14 @@ def nanmedian(a, axis=None, overwrite_input=False): Returns ------- median : ndarray - A new array holding the result (unless `out` is specified, in - which case that array is returned instead). If the input contains - integers, or floats of smaller precision than 64, then the output - data-type is float64. Otherwise, the output data-type is the same - as that of the input. + A new array holding the result. If the input contains integers, or + floats of smaller precision than 64, then the output data-type is + float64. Otherwise, the output data-type is the same as that of the + input. See Also -------- - mean, percentile + mean, median, percentile Notes ----- @@ -708,29 +703,26 @@ def nanmedian(a, axis=None, overwrite_input=False): Examples -------- - >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) + >>> a[0, 1] = np.nan >>> a - array([[10, 7, 4], - [ 3, 2, 1]]) + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) >>> np.median(a) - 3.5 - >>> np.median(a, axis=0) - array([ 6.5, 4.5, 2.5]) + nan + >>> np.nanmedian(a) + 3.0 + >>> np.nanmedian(a, axis=0) + array([ 6.5, 2., 2.5]) >>> np.median(a, axis=1) array([ 7., 2.]) - >>> m = np.median(a, axis=0) - >>> out = np.zeros_like(m) - >>> np.median(a, axis=0, out=m) - array([ 6.5, 4.5, 2.5]) - >>> m - array([ 6.5, 4.5, 2.5]) >>> b = a.copy() - >>> np.median(b, axis=1, overwrite_input=True) + >>> np.nanmedian(b, axis=1, overwrite_input=True) array([ 7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() - >>> np.median(b, axis=None, overwrite_input=True) - 3.5 + >>> np.nanmedian(b, axis=None, overwrite_input=True) + 3.0 >>> assert not np.all(a==b) """ From d8e39576f3b058645dd401d75d74c37b33d3facd Mon Sep 17 00:00:00 2001 From: empeeu Date: Mon, 17 Feb 2014 21:12:57 -0500 Subject: [PATCH 05/17] REV: Reverting nanmedian from this PR. Now deals exclusively with handling of nans in median. --- numpy/lib/nanfunctions.py | 141 +------------------------------------- 1 file changed, 2 insertions(+), 139 deletions(-) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 48a001bf6652..5766084abfae 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -18,11 +18,11 @@ import warnings import numpy as np -from numpy.core.fromnumeric import partition + __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanmedian', 'nanvar', 'nanstd' + 'nanvar', 'nanstd' ] @@ -601,143 +601,6 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): return avg -def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays - """Private function for rank 1 arrays. Compute the median ignoring NaNs. - - Parameters - ---------- - arr1d : ndarray - Input array, of rank 1. - overwrite_input : bool, optional - If True, then allow use of memory of input array (a) for - calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve - the contents of the input array. Treat the input as undefined, - but it will probably be fully or partially sorted. Default is - False. Note that, if `overwrite_input` is True and the input - is not already an ndarray, an error will be raised. - - Results - ------- - m : float - The median. - """ - - if overwrite_input: - part = arr1d.ravel() - #filter out the NaNs - sz = part.size - np.sum(np.isnan(part)) - if sz == 0: - warnings.warn("All-NaN slice encountered", RuntimeWarning) - return np.nan - if sz % 2 == 0: - szh = sz // 2 - part.partition((szh - 1, szh)) - else: - part.partition((sz - 1) // 2) - else: - sz = arr1d.size - np.sum(np.isnan(arr1d)) - if sz == 0: - warnings.warn("All-NaN slice encountered", RuntimeWarning) - return np.nan - if sz % 2 == 0: - part = partition(arr1d, ((sz // 2) - 1, sz // 2), axis=None) - else: - part = partition(arr1d, (sz - 1) // 2, axis=None) - if part.shape == (): - # make 0-D arrays work - return part.item() - axis = 0 - index = sz // 2 - if sz % 2 == 1: - # index with slice to allow mean (below) to work - indexer = slice(index, index+1) - else: - indexer = slice(index-1, index+1) - - # Use mean in odd and even case to coerce data type - # and check, use out array. - return np.mean(part[indexer], axis=axis) - - -def nanmedian(a, axis=None, overwrite_input=False): - """ - Compute the median along the specified axis, while ignoring NaNs. - - Returns the median of the array elements. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - axis : int, optional - Axis along which the medians are computed. The default (axis=None) - is to compute the median along a flattened version of the array. - overwrite_input : bool, optional - If True, then allow use of memory of input array (a) for - calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve - the contents of the input array. Treat the input as undefined, - but it will probably be fully or partially sorted. Default is - False. Note that, if `overwrite_input` is True and the input - is not already an ndarray, an error will be raised. - - Returns - ------- - median : ndarray - A new array holding the result. If the input contains integers, or - floats of smaller precision than 64, then the output data-type is - float64. Otherwise, the output data-type is the same as that of the - input. - - See Also - -------- - mean, median, percentile - - Notes - ----- - Given a vector V of length N, the median of V is the middle value of - a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is - odd. When N is even, it is the average of the two middle values of - ``V_sorted``. - - Examples - -------- - >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) - >>> a[0, 1] = np.nan - >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) - >>> np.median(a) - nan - >>> np.nanmedian(a) - 3.0 - >>> np.nanmedian(a, axis=0) - array([ 6.5, 2., 2.5]) - >>> np.median(a, axis=1) - array([ 7., 2.]) - >>> b = a.copy() - >>> np.nanmedian(b, axis=1, overwrite_input=True) - array([ 7., 2.]) - >>> assert not np.all(a==b) - >>> b = a.copy() - >>> np.nanmedian(b, axis=None, overwrite_input=True) - 3.0 - >>> assert not np.all(a==b) - - """ - a = np.asanyarray(a) - if axis is not None and axis >= a.ndim: - raise IndexError( - "axis %d out of bounds (%d)" % (axis, a.ndim)) - - if axis is None: - part = a.ravel() - return _nanmedian1d(part, overwrite_input) - else: - return np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) - - def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the specified axis, while ignoring NaNs. From aa615ad086989a7fe3900dd6921e83ff5ab02814 Mon Sep 17 00:00:00 2001 From: jspreston Date: Thu, 20 Feb 2014 14:32:20 -0700 Subject: [PATCH 06/17] numpy.i bugfix: fortran ordering check for ARGOUTVIEWM?_FARRAY[34] typemaps, the included require_fortran check was inverted -- failing if the resulting array had fortran ordering, not if it did not. This modification inverts these checks. --- doc/swig/numpy.i | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/swig/numpy.i b/doc/swig/numpy.i index f2714cc34613..5297254799dc 100644 --- a/doc/swig/numpy.i +++ b/doc/swig/numpy.i @@ -2246,7 +2246,7 @@ PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } @@ -2270,7 +2270,7 @@ PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$4)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } @@ -2345,7 +2345,7 @@ PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } @@ -2370,7 +2370,7 @@ PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } @@ -2680,7 +2680,7 @@ PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); @@ -2717,7 +2717,7 @@ PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$4)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); @@ -2831,7 +2831,7 @@ PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); @@ -2869,7 +2869,7 @@ PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); @@ -2983,7 +2983,7 @@ PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); @@ -3021,7 +3021,7 @@ PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); PyArrayObject* array = (PyArrayObject*) obj; - if (!array || require_fortran(array)) SWIG_fail; + if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); From 89d4d1eb75ca2e15de7ab6a6a5712a7674d1d520 Mon Sep 17 00:00:00 2001 From: empeeu Date: Tue, 25 Feb 2014 19:28:30 -0500 Subject: [PATCH 07/17] ENH: #586 Pulls in juliantaylor's specialization. New median with nan checking is now pretty much as fast as the old one. Hence, removed the check_nan flag from previous commits. --- numpy/core/src/npysort/selection.c.src | 13 +++++++++++++ numpy/lib/function_base.py | 12 ++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/numpy/core/src/npysort/selection.c.src b/numpy/core/src/npysort/selection.c.src index 073b5847fe50..dc8d16d2c7f9 100644 --- a/numpy/core/src/npysort/selection.c.src +++ b/numpy/core/src/npysort/selection.c.src @@ -322,6 +322,19 @@ int store_pivot(kth, kth, pivots, npiv); return 0; } + else if (kth == num - 1) { + npy_intp k; + npy_intp maxidx = low; + @type@ maxval = v[IDX(low)]; + for (k = low + 1; k < num; k++) { + if (!@TYPE@_LT(v[IDX(k)], maxval)) { + maxidx = k; + maxval = v[IDX(k)]; + } + } + SWAP(SORTEE(kth), SORTEE(maxidx)); + return 0; + } /* dumb integer msb, float npy_log2 too slow for small parititions */ { diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 9df1080c1e3e..d446badec52b 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -2692,7 +2692,7 @@ def msort(a): return b -def median(a, axis=None, out=None, overwrite_input=False, check_nan=True): +def median(a, axis=None, out=None, overwrite_input=False): """ Compute the median along the specified axis. @@ -2717,11 +2717,7 @@ def median(a, axis=None, out=None, overwrite_input=False, check_nan=True): but it will probably be fully or partially sorted. Default is False. Note that, if `overwrite_input` is True and the input is not already an ndarray, an error will be raised. - check_nan : bool, optional - If True, then the input array will be tested for the presence of NaNs. - If the input array does not contain NaNs, this flag can be set to False - for a 1%-70% performance increase. - + Returns ------- median : ndarray @@ -2786,7 +2782,7 @@ def median(a, axis=None, out=None, overwrite_input=False, check_nan=True): else: kth = [(sz - 1) // 2] #Check if the array contains any nan's - if check_nan and np.issubdtype(a.dtype, np.inexact): + if np.issubdtype(a.dtype, np.inexact): kth.append(-1) if overwrite_input: @@ -2812,7 +2808,7 @@ def median(a, axis=None, out=None, overwrite_input=False, check_nan=True): else: indexer[axis] = slice(index-1, index+1) #Check if the array contains any nan's - if check_nan and np.issubdtype(a.dtype, np.inexact): + if np.issubdtype(a.dtype, np.inexact): if part.ndim <= 1: if np.isnan(part[-1]): return np.nan From ea8975718f7d275dfa2c91bf1de8b3fe6d792d9c Mon Sep 17 00:00:00 2001 From: Julian Taylor Date: Wed, 5 Mar 2014 21:54:39 +0100 Subject: [PATCH 08/17] BUG: restore api for file npy_PyFile_Dup and npy_PyFile_DupClose breaking the api breaks matplotlib build and pip installation. Introduce npy_PyFile_Dup2 and npy_PyFile_DupClose2 as replacements --- doc/release/1.9.0-notes.rst | 7 ++- numpy/core/include/numpy/npy_3kcompat.h | 54 ++++++++++++++++++-- numpy/core/src/multiarray/methods.c | 4 +- numpy/core/src/multiarray/multiarraymodule.c | 4 +- 4 files changed, 59 insertions(+), 10 deletions(-) diff --git a/doc/release/1.9.0-notes.rst b/doc/release/1.9.0-notes.rst index cf33cdb1c655..e7d4a7406629 100644 --- a/doc/release/1.9.0-notes.rst +++ b/doc/release/1.9.0-notes.rst @@ -242,7 +242,12 @@ For example ``np.float_(2) * [1]`` will be an error in the future. C-API ~~~~~ -None +The utility function npy_PyFile_Dup and npy_PyFile_DupClose are broken by the +internal buffering python 3 applies to its file objects. +To fix this two new functions npy_PyFile_Dup2 and npy_PyFile_DupClose2 are +declared in npy_3kcompat.h and the old functions are deprecated. +Due to the fragile nature of these functions it is recommended to instead use +the python API when possible. New Features diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h index 0d33de17a83c..36b1def4bcfa 100644 --- a/numpy/core/include/numpy/npy_3kcompat.h +++ b/numpy/core/include/numpy/npy_3kcompat.h @@ -141,12 +141,11 @@ PyUnicode_Concat2(PyObject **left, PyObject *right) * PyFile_* compatibility */ #if defined(NPY_PY3K) - /* * Get a FILE* handle to the file represented by the Python object */ static NPY_INLINE FILE* -npy_PyFile_Dup(PyObject *file, char *mode, npy_off_t *orig_pos) +npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) { int fd, fd2; PyObject *ret, *os; @@ -221,7 +220,7 @@ npy_PyFile_Dup(PyObject *file, char *mode, npy_off_t *orig_pos) * Close the dup-ed file handle, and seek the Python one to the current position */ static NPY_INLINE int -npy_PyFile_DupClose(PyObject *file, FILE* handle, npy_off_t orig_pos) +npy_PyFile_DupClose2(PyObject *file, FILE* handle, npy_off_t orig_pos) { int fd; PyObject *ret; @@ -269,10 +268,55 @@ npy_PyFile_Check(PyObject *file) return 1; } +/* + * DEPRECATED DO NOT USE + * use npy_PyFile_Dup2 instead + * this function will mess ups python3 internal file object buffering + * Get a FILE* handle to the file represented by the Python object + */ +static NPY_INLINE FILE* +npy_PyFile_Dup(PyObject *file, char *mode) +{ + npy_off_t orig; + if (DEPRECATE("npy_PyFile_Dup is deprecated, use " + "npy_PyFile_Dup2") < 0) { + return NULL; + } + + return npy_PyFile_Dup2(file, mode, &orig); +} + +/* + * DEPRECATED DO NOT USE + * use npy_PyFile_DupClose2 instead + * this function will mess ups python3 internal file object buffering + * Close the dup-ed file handle, and seek the Python one to the current position + */ +static NPY_INLINE int +npy_PyFile_DupClose(PyObject *file, FILE* handle) +{ + PyObject *ret; + Py_ssize_t position; + position = npy_ftell(handle); + fclose(handle); + + ret = PyObject_CallMethod(file, "seek", NPY_SSIZE_T_PYFMT "i", position, 0); + if (ret == NULL) { + return -1; + } + Py_DECREF(ret); + return 0; +} + + #else -#define npy_PyFile_Dup(file, mode, orig_pos_p) PyFile_AsFile(file) -#define npy_PyFile_DupClose(file, handle, orig_pos) (0) +/* DEPRECATED DO NOT USE */ +#define npy_PyFile_Dup(file, mode) PyFile_AsFile(file) +#define npy_PyFile_DupClose(file, handle) (0) +/* use these */ +#define npy_PyFile_Dup2(file, mode, orig_pos_p) PyFile_AsFile(file) +#define npy_PyFile_DupClose2(file, handle, orig_pos) (0) #define npy_PyFile_Check PyFile_Check #endif diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 66d68e5b322e..91f6396509ff 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -588,7 +588,7 @@ array_tofile(PyArrayObject *self, PyObject *args, PyObject *kwds) own = 0; } - fd = npy_PyFile_Dup(file, "wb", &orig_pos); + fd = npy_PyFile_Dup2(file, "wb", &orig_pos); if (fd == NULL) { PyErr_SetString(PyExc_IOError, "first argument must be a string or open file"); @@ -597,7 +597,7 @@ array_tofile(PyArrayObject *self, PyObject *args, PyObject *kwds) if (PyArray_ToFile(self, fd, sep, format) < 0) { goto fail; } - if (npy_PyFile_DupClose(file, fd, orig_pos) < 0) { + if (npy_PyFile_DupClose2(file, fd, orig_pos) < 0) { goto fail; } if (own && npy_PyFile_CloseFile(file) < 0) { diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index b3fdd040fdd5..cfaf7799ad85 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1999,7 +1999,7 @@ array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) Py_INCREF(file); own = 0; } - fp = npy_PyFile_Dup(file, "rb", &orig_pos); + fp = npy_PyFile_Dup2(file, "rb", &orig_pos); if (fp == NULL) { PyErr_SetString(PyExc_IOError, "first argument must be an open file"); @@ -2011,7 +2011,7 @@ array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) } ret = PyArray_FromFile(fp, type, (npy_intp) nin, sep); - if (npy_PyFile_DupClose(file, fp, orig_pos) < 0) { + if (npy_PyFile_DupClose2(file, fp, orig_pos) < 0) { goto fail; } if (own && npy_PyFile_CloseFile(file) < 0) { From 4b2b77e0b50b3b3ffdb0ea17f38e14d3b8e82ebd Mon Sep 17 00:00:00 2001 From: Daniel da Silva Date: Wed, 5 Mar 2014 20:06:00 -0500 Subject: [PATCH 09/17] Revert "Merge pull request #4421 from meltingwax/meltingwax/4382" Caused SciPy tests to fail when built with this NumPy. --- numpy/distutils/fcompiler/__init__.py | 10 ++- numpy/distutils/misc_util.py | 8 +-- numpy/distutils/npy_pkg_config.py | 14 ++-- numpy/distutils/tests/test_npy_pkg_config.py | 75 +++----------------- numpy/distutils/unixccompiler.py | 5 +- 5 files changed, 23 insertions(+), 89 deletions(-) diff --git a/numpy/distutils/fcompiler/__init__.py b/numpy/distutils/fcompiler/__init__.py index 07d5e506ea83..0b1b1ee6d9a8 100644 --- a/numpy/distutils/fcompiler/__init__.py +++ b/numpy/distutils/fcompiler/__init__.py @@ -38,7 +38,7 @@ from numpy.distutils.ccompiler import CCompiler, gen_lib_options from numpy.distutils import log from numpy.distutils.misc_util import is_string, all_strings, is_sequence, \ - make_temp_file, get_shared_lib_extension, quote + make_temp_file, get_shared_lib_extension from numpy.distutils.environment import EnvironmentConfig from numpy.distutils.exec_command import find_executable from numpy.distutils.compat import get_exception @@ -582,12 +582,12 @@ def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): % (self.__class__.__name__, src)) extra_compile_args = self.extra_f90_compile_args or [] if self.object_switch[-1]==' ': - o_args = [self.object_switch.strip(), quote(obj)] + o_args = [self.object_switch.strip(), obj] else: - o_args = [self.object_switch.strip() + quote(obj)] + o_args = [self.object_switch.strip()+obj] assert self.compile_switch.strip() - s_args = [self.compile_switch, quote(src)] + s_args = [self.compile_switch, src] if extra_compile_args: log.info('extra %s options: %r' \ @@ -659,7 +659,6 @@ def link(self, target_desc, objects, else: ld_args = objects + self.objects ld_args = ld_args + lib_opts + o_args - ld_args = [quote(ld_arg) for ld_arg in ld_args] if debug: ld_args[:0] = ['-g'] if extra_preargs: @@ -988,4 +987,3 @@ def get_f77flags(src): if __name__ == '__main__': show_fcompilers() - diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py index a9ca12832b2e..c146178f0647 100644 --- a/numpy/distutils/misc_util.py +++ b/numpy/distutils/misc_util.py @@ -14,11 +14,6 @@ import distutils from distutils.errors import DistutilsError -try: - from pipes import quote -except ImportError: - from shlex import quote - try: set except NameError: @@ -36,8 +31,7 @@ 'get_script_files', 'get_lib_source_files', 'get_data_files', 'dot_join', 'get_frame', 'minrelpath', 'njoin', 'is_sequence', 'is_string', 'as_list', 'gpaths', 'get_language', - 'quote_args', 'quote', 'get_build_architecture', 'get_info', - 'get_pkg_info'] + 'quote_args', 'get_build_architecture', 'get_info', 'get_pkg_info'] class InstallableLib(object): """ diff --git a/numpy/distutils/npy_pkg_config.py b/numpy/distutils/npy_pkg_config.py index 3192bfa32fe5..ceab906a4edf 100644 --- a/numpy/distutils/npy_pkg_config.py +++ b/numpy/distutils/npy_pkg_config.py @@ -10,8 +10,6 @@ else: from configparser import ConfigParser, SafeConfigParser, NoOptionError -from numpy.distutils.misc_util import quote - __all__ = ['FormatError', 'PkgNotFound', 'LibraryInfo', 'VariableSet', 'read_config', 'parse_flags'] @@ -58,7 +56,7 @@ def parse_flags(line): * 'ignored' """ - lexer = shlex.shlex(line, posix=True) + lexer = shlex.shlex(line) lexer.whitespace_split = True d = {'include_dirs': [], 'library_dirs': [], 'libraries': [], @@ -90,6 +88,8 @@ def next_token(t): return d +def _escape_backslash(val): + return val.replace('\\', '\\\\') class LibraryInfo(object): """ @@ -147,11 +147,11 @@ def sections(self): def cflags(self, section="default"): val = self.vars.interpolate(self._sections[section]['cflags']) - return quote(val) + return _escape_backslash(val) def libs(self, section="default"): val = self.vars.interpolate(self._sections[section]['libs']) - return quote(val) + return _escape_backslash(val) def __str__(self): m = ['Name: %s' % self.name] @@ -289,7 +289,7 @@ def parse_config(filename, dirs=None): vars = {} if config.has_section('variables'): for name, value in config.items("variables"): - vars[name] = quote(value) + vars[name] = _escape_backslash(value) # Parse "normal" sections secs = [s for s in config.sections() if not s in ['meta', 'variables']] @@ -338,7 +338,7 @@ def _read_config(f): (pkgname, meta["name"])) mod = sys.modules[pkgname] - vars["pkgdir"] = quote(os.path.dirname(mod.__file__)) + vars["pkgdir"] = _escape_backslash(os.path.dirname(mod.__file__)) return LibraryInfo(name=meta["name"], description=meta["description"], version=meta["version"], sections=sections, vars=VariableSet(vars)) diff --git a/numpy/distutils/tests/test_npy_pkg_config.py b/numpy/distutils/tests/test_npy_pkg_config.py index e703edbd091f..5443ece485b2 100644 --- a/numpy/distutils/tests/test_npy_pkg_config.py +++ b/numpy/distutils/tests/test_npy_pkg_config.py @@ -1,7 +1,6 @@ from __future__ import division, absolute_import, print_function import os -import shlex from tempfile import mkstemp from numpy.testing import * @@ -39,12 +38,6 @@ 'version': '0.1', 'name': 'foo'} class TestLibraryInfo(TestCase): - - def assertLexEqual(self, str1, str2): - # Use shlex.split for comparison because it is above quotes - # eg: shlex.split("'abc'") == shlex.split("abc") - return shlex.split(str1) == shlex.split(str2) - def test_simple(self): fd, filename = mkstemp('foo.ini') try: @@ -55,10 +48,10 @@ def test_simple(self): os.close(fd) out = read_config(pkg) - self.assertLexEqual(out.cflags(), simple_d['cflags']) - self.assertLexEqual(out.libs(), simple_d['libflags']) - self.assertEqual(out.name, simple_d['name']) - self.assertEqual(out.version, simple_d['version']) + self.assertTrue(out.cflags() == simple_d['cflags']) + self.assertTrue(out.libs() == simple_d['libflags']) + self.assertTrue(out.name == simple_d['name']) + self.assertTrue(out.version == simple_d['version']) finally: os.remove(filename) @@ -72,13 +65,13 @@ def test_simple_variable(self): os.close(fd) out = read_config(pkg) - self.assertLexEqual(out.cflags(), simple_variable_d['cflags']) - self.assertLexEqual(out.libs(), simple_variable_d['libflags']) - self.assertEqual(out.name, simple_variable_d['name']) - self.assertEqual(out.version, simple_variable_d['version']) + self.assertTrue(out.cflags() == simple_variable_d['cflags']) + self.assertTrue(out.libs() == simple_variable_d['libflags']) + self.assertTrue(out.name == simple_variable_d['name']) + self.assertTrue(out.version == simple_variable_d['version']) out.vars['prefix'] = '/Users/david' - self.assertLexEqual(out.cflags(), '-I/Users/david/include') + self.assertTrue(out.cflags() == '-I/Users/david/include') finally: os.remove(filename) @@ -95,28 +88,6 @@ def test_simple_cflags(self): self.assertTrue(d['include_dirs'] == ['/usr/include']) self.assertTrue(d['macros'] == ['FOO']) - def test_quotes_cflags(self): - d = parse_flags("-I'/usr/foo bar/include' -DFOO") - self.assertTrue(d['include_dirs'] == ['/usr/foo bar/include']) - self.assertTrue(d['macros'] == ['FOO']) - - d = parse_flags("-I/usr/'foo bar'/include -DFOO") - self.assertTrue(d['include_dirs'] == ['/usr/foo bar/include']) - self.assertTrue(d['macros'] == ['FOO']) - - d = parse_flags("'-I/usr/foo bar'/include -DFOO") - self.assertTrue(d['include_dirs'] == ['/usr/foo bar/include']) - self.assertTrue(d['macros'] == ['FOO']) - - def test_escaping_cflags(self): - d = parse_flags("-I/usr/foo\\ bar/include -DFOO") - self.assertTrue(d['include_dirs'] == ['/usr/foo bar/include']) - self.assertTrue(d['macros'] == ['FOO']) - - d = parse_flags(r"-I/usr/foo\ bar/include -DFOO") - self.assertTrue(d['include_dirs'] == ['/usr/foo bar/include']) - self.assertTrue(d['macros'] == ['FOO']) - def test_simple_lflags(self): d = parse_flags("-L/usr/lib -lfoo -L/usr/lib -lbar") self.assertTrue(d['library_dirs'] == ['/usr/lib', '/usr/lib']) @@ -125,31 +96,3 @@ def test_simple_lflags(self): d = parse_flags("-L /usr/lib -lfoo -L/usr/lib -lbar") self.assertTrue(d['library_dirs'] == ['/usr/lib', '/usr/lib']) self.assertTrue(d['libraries'] == ['foo', 'bar']) - - def test_quotes_lflags(self): - d = parse_flags("-L'/usr/foo bar' -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo bar', '/usr/lib']) - - d = parse_flags("-L/usr/'foo bar' -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo bar', '/usr/lib']) - - d = parse_flags("\"-L/usr/foo bar\" -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo bar', '/usr/lib']) - - d = parse_flags("\"-L/usr/foo bar/baz buz\" -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo bar/baz buz', '/usr/lib']) - - def test_escaping_lflags(self): - d = parse_flags("-L/usr/foo\\ bar -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo bar', '/usr/lib']) - - d = parse_flags(r"-L/usr/foo\ bar -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo bar', '/usr/lib']) - - def test_odd_characters_lflags(self): - # tab in directory name - d = parse_flags('-L/usr/"foo\tbar" -lfoo -L/usr/lib -lbar') - self.assertTrue(d['library_dirs'] == ['/usr/foo\tbar', '/usr/lib']) - - d = parse_flags("-L/usr/foo\\\tbar -lfoo -L/usr/lib -lbar") - self.assertTrue(d['library_dirs'] == ['/usr/foo\tbar', '/usr/lib']) diff --git a/numpy/distutils/unixccompiler.py b/numpy/distutils/unixccompiler.py index a198acb8bcfd..955407aa0384 100644 --- a/numpy/distutils/unixccompiler.py +++ b/numpy/distutils/unixccompiler.py @@ -10,7 +10,6 @@ from distutils.unixccompiler import * from numpy.distutils.ccompiler import replace_method from numpy.distutils.compat import get_exception -from numpy.distutils.misc_util import quote_args, quote if sys.version_info[0] < 3: from . import log @@ -89,8 +88,8 @@ def UnixCCompiler_create_static_lib(self, objects, output_libname, display = '%s: adding %d object files to %s' % ( os.path.basename(self.archiver[0]), len(objects), output_filename) - command = self.archiver + [quote(output_filename)] + quote_args(objects) - self.spawn(command, display = display) + self.spawn(self.archiver + [output_filename] + objects, + display = display) # Not many Unices required ranlib anymore -- SunOS 4.x is, I # think the only major Unix that does. Maybe we need some From bf2a953362b7725e999662b1f5d0ba63f16b28b5 Mon Sep 17 00:00:00 2001 From: Julian Taylor Date: Thu, 6 Mar 2014 22:59:39 +0100 Subject: [PATCH 10/17] TST: work around outdated travis boxes run manual apt-get update to pick up the latest py3 security update --- tools/travis-test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/travis-test.sh b/tools/travis-test.sh index 4783428c142e..dfe244dca920 100755 --- a/tools/travis-test.sh +++ b/tools/travis-test.sh @@ -103,6 +103,7 @@ PYTHON=${PYTHON:-python} PIP=${PIP:-pip} if [ -n "$USE_DEBUG" ]; then + sudo apt-get update # 06.03.2014, temporary until travis boxes are resynced sudo apt-get install -qq -y --force-yes python3-dbg python3-dev python3-nose PYTHON=python3-dbg fi From ea75479ce8b43a14ac8aed23525c767d4e09f97a Mon Sep 17 00:00:00 2001 From: Cimarron Mittelsteadt Date: Thu, 6 Mar 2014 19:00:17 -0800 Subject: [PATCH 11/17] DOC: Fixed documentation on lstsq function on when it return an empty residuals array --- numpy/linalg/linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py index 102c6aec2eae..e5d13efc8682 100644 --- a/numpy/linalg/linalg.py +++ b/numpy/linalg/linalg.py @@ -1763,7 +1763,7 @@ def lstsq(a, b, rcond=-1): residuals : {(), (1,), (K,)} ndarray Sums of residuals; squared Euclidean 2-norm for each column in ``b - a*x``. - If the rank of `a` is < N or > M, this is an empty array. + If the rank of `a` is < N or M <= N, this is an empty array. If `b` is 1-dimensional, this is a (1,) shape array. Otherwise the shape is (K,). rank : int From c35542c18469b63cd4556da6f99c296899f7b5b1 Mon Sep 17 00:00:00 2001 From: empeeu Date: Wed, 12 Feb 2014 19:10:31 -0500 Subject: [PATCH 12/17] ENH: added proper handling of nans for numpy.lib.function_base.median. This closes ticket# 2126 (issue #586) --- numpy/lib/function_base.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 62ced0509dc3..e4db8a4a942e 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -2773,7 +2773,15 @@ def median(a, axis=None, out=None, overwrite_input=False): if axis is not None and axis >= a.ndim: raise IndexError( "axis %d out of bounds (%d)" % (axis, a.ndim)) - + + #Check if the array contains any nan's + ids = None + if axis is None or a.ndim == 1: + if np.any(np.isnan(a)): + return np.array([np.nan]) + else: #continue the calculation but replace results with nans where needed + ids = np.any(np.isnan(a), axis=axis) + if overwrite_input: if axis is None: part = a.ravel() @@ -2812,9 +2820,16 @@ def median(a, axis=None, out=None, overwrite_input=False): indexer[axis] = slice(index, index+1) else: indexer[axis] = slice(index-1, index+1) - # Use mean in odd and even case to coerce data type - # and check, use out array. - return mean(part[indexer], axis=axis, out=out) + + if ids == None: #if there are no nans + # Use mean in odd and even case to coerce data type + # and check, use out array. + return mean(part[indexer], axis=axis, out=out) + else: #replace results where needed with nans + out = mean(part[indexer], axis=axis, out=out) + out[ids] = np.nan + return out + def percentile(a, q, axis=None, out=None, From 210f534a40f9ec62d7a142726ea95a379d3fe9d8 Mon Sep 17 00:00:00 2001 From: empeeu Date: Wed, 12 Feb 2014 19:40:00 -0500 Subject: [PATCH 13/17] ENH: added unit test for previous commit which updates median's handling of nans in reference to ticket #586 --- numpy/lib/tests/test_function_base.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 3e102cf6ac99..a18b721c3589 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1865,6 +1865,27 @@ def mean(self, axis=None, dtype=None, out=None): a = MySubClass([1,2,3]) assert_equal(np.median(a), -7) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + #no axis + assert_equal(np.median(a), np.nan) + #axis0 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) + b[2, 3] = np.nan; b[1, 2] = np.nan + assert_equal(np.median(a, 0), b) + #axis1 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) + b[1, 3] = np.nan; b[1, 2] = np.nan + assert_equal(np.median(a, 1), b) + class TestAdd_newdoc_ufunc(TestCase): From 22eeef0bd129ae7d1ca34cc00e36b4ae79e67694 Mon Sep 17 00:00:00 2001 From: empeeu Date: Sun, 16 Feb 2014 19:44:06 -0500 Subject: [PATCH 14/17] ENH: #586. Improved performance of nan handling in median function. Also added a flag "check_nan" that can be turned off to regain original performance. Also added nanmedian function to calculate the median while ignorning nans. --- numpy/lib/function_base.py | 116 ++++++++++---------- numpy/lib/nanfunctions.py | 149 +++++++++++++++++++++++++- numpy/lib/tests/test_function_base.py | 9 ++ 3 files changed, 217 insertions(+), 57 deletions(-) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index e4db8a4a942e..72ae0b3cf3cf 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1101,7 +1101,7 @@ def interp(x, xp, fp, left=None, right=None): ----- Does not check that the x-coordinate sequence `xp` is increasing. If `xp` is not increasing, the results are nonsense. - A simple check for increasing is:: + A simple check for increasingness is:: np.all(np.diff(xp) > 0) @@ -1578,16 +1578,15 @@ class vectorize(object): The `vectorize` function is provided primarily for convenience, not for performance. The implementation is essentially a for loop. - If `otypes` is not specified, then a call to the function with the - first argument will be used to determine the number of outputs. The - results of this call will be cached if `cache` is `True` to prevent - calling the function twice. However, to implement the cache, the - original function must be wrapped which will slow down subsequent - calls, so only do this if your function is expensive. - - The new keyword argument interface and `excluded` argument support - further degrades performance. + If `otypes` is not specified, then a call to the function with the first + argument will be used to determine the number of outputs. The results of + this call will be cached if `cache` is `True` to prevent calling the + function twice. However, to implement the cache, the original function + must be wrapped which will slow down subsequent calls, so only do this if + your function is expensive. + The new keyword argument interface and `excluded` argument support further + degrades performance. """ def __init__(self, pyfunc, otypes='', doc=None, excluded=None, cache=False): @@ -1811,11 +1810,9 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): "ddof must be integer") # Handles complex arrays too - m = np.asarray(m) if y is None: dtype = np.result_type(m, np.float64) else: - y = np.asarray(y) dtype = np.result_type(m, y, np.float64) X = array(m, ndmin=2, dtype=dtype) @@ -1912,7 +1909,7 @@ def blackman(M): """ Return the Blackman window. - The Blackman window is a taper formed by using the first three + The Blackman window is a taper formed by using the the first three terms of a summation of cosines. It was designed to have close to the minimal leakage possible. It is close to optimal, only slightly worse than a Kaiser window. @@ -2142,10 +2139,9 @@ def hanning(M): .. math:: w(n) = 0.5 - 0.5cos\\left(\\frac{2\\pi{n}}{M-1}\\right) \\qquad 0 \\leq n \\leq M-1 - The Hanning was named for Julius van Hann, an Austrian meteorologist. - It is also known as the Cosine Bell. Some authors prefer that it be - called a Hann window, to help avoid confusion with the very similar - Hamming window. + The Hanning was named for Julius van Hann, an Austrian meterologist. It is + also known as the Cosine Bell. Some authors prefer that it be called a + Hann window, to help avoid confusion with the very similar Hamming window. Most references to the Hanning window come from the signal processing literature, where it is used as one of many windowing functions for @@ -2242,9 +2238,9 @@ def hamming(M): .. math:: w(n) = 0.54 - 0.46cos\\left(\\frac{2\\pi{n}}{M-1}\\right) \\qquad 0 \\leq n \\leq M-1 - The Hamming was named for R. W. Hamming, an associate of J. W. Tukey - and is described in Blackman and Tukey. It was recommended for - smoothing the truncated autocovariance function in the time domain. + The Hamming was named for R. W. Hamming, an associate of J. W. Tukey and + is described in Blackman and Tukey. It was recommended for smoothing the + truncated autocovariance function in the time domain. Most references to the Hamming window come from the signal processing literature, where it is used as one of many windowing functions for smoothing values. It is also known as an apodization (which means @@ -2424,11 +2420,11 @@ def i0(x): Notes ----- We use the algorithm published by Clenshaw [1]_ and referenced by - Abramowitz and Stegun [2]_, for which the function domain is - partitioned into the two intervals [0,8] and (8,inf), and Chebyshev - polynomial expansions are employed in each interval. Relative error on - the domain [0,30] using IEEE arithmetic is documented [3]_ as having a - peak of 5.8e-16 with an rms of 1.4e-16 (n = 30000). + Abramowitz and Stegun [2]_, for which the function domain is partitioned + into the two intervals [0,8] and (8,inf), and Chebyshev polynomial + expansions are employed in each interval. Relative error on the domain + [0,30] using IEEE arithmetic is documented [3]_ as having a peak of 5.8e-16 + with an rms of 1.4e-16 (n = 30000). References ---------- @@ -2498,11 +2494,12 @@ def kaiser(M, beta): where :math:`I_0` is the modified zeroth-order Bessel function. - The Kaiser was named for Jim Kaiser, who discovered a simple - approximation to the DPSS window based on Bessel functions. The Kaiser - window is a very good approximation to the Digital Prolate Spheroidal - Sequence, or Slepian window, which is the transform which maximizes the - energy in the main lobe of the window relative to total energy. + The Kaiser was named for Jim Kaiser, who discovered a simple approximation + to the DPSS window based on Bessel functions. + The Kaiser window is a very good approximation to the Digital Prolate + Spheroidal Sequence, or Slepian window, which is the transform which + maximizes the energy in the main lobe of the window relative to total + energy. The Kaiser can approximate many other windows by varying the beta parameter. @@ -2612,8 +2609,8 @@ def sinc(x): The name sinc is short for "sine cardinal" or "sinus cardinalis". The sinc function is used in various signal processing applications, - including in anti-aliasing, in the construction of a Lanczos resampling - filter, and in interpolation. + including in anti-aliasing, in the construction of a + Lanczos resampling filter, and in interpolation. For bandlimited interpolation of discrete-time signals, the ideal interpolation kernel is proportional to the sinc function. @@ -2774,23 +2771,24 @@ def median(a, axis=None, out=None, overwrite_input=False): raise IndexError( "axis %d out of bounds (%d)" % (axis, a.ndim)) + #Set the partition indexes + if axis is None: + sz = a.size + else: + sz = a.shape[axis] + if sz % 2 == 0: + szh = sz // 2 + kth = [szh - 1, szh] + else: + kth = [(sz - 1) // 2] #Check if the array contains any nan's - ids = None - if axis is None or a.ndim == 1: - if np.any(np.isnan(a)): - return np.array([np.nan]) - else: #continue the calculation but replace results with nans where needed - ids = np.any(np.isnan(a), axis=axis) - + if check_nan and np.issubdtype(a.dtype, np.inexact): + kth.append(-1) + if overwrite_input: if axis is None: part = a.ravel() - sz = part.size - if sz % 2 == 0: - szh = sz // 2 - part.partition((szh - 1, szh)) - else: - part.partition((sz - 1) // 2) + part.partition(kth) else: sz = a.shape[axis] if sz % 2 == 0: @@ -2799,15 +2797,10 @@ def median(a, axis=None, out=None, overwrite_input=False): else: a.partition((sz - 1) // 2, axis=axis) part = a + a.partition(kth, axis=axis) else: - if axis is None: - sz = a.size - else: - sz = a.shape[axis] - if sz % 2 == 0: - part = partition(a, ((sz // 2) - 1, sz // 2), axis=axis) - else: - part = partition(a, (sz - 1) // 2, axis=axis) + part = partition(a, kth, axis=axis) + if part.shape == (): # make 0-D arrays work return part.item() @@ -2820,8 +2813,21 @@ def median(a, axis=None, out=None, overwrite_input=False): indexer[axis] = slice(index, index+1) else: indexer[axis] = slice(index-1, index+1) - - if ids == None: #if there are no nans + #Check if the array contains any nan's + if check_nan and np.issubdtype(a.dtype, np.inexact): + if part.ndim <= 1: + if np.isnan(part[-1]): + return np.nan + else: + return mean(part[indexer], axis=axis, out=out) + else: + nan_indexer = [slice(None)] * part.ndim + nan_indexer[axis] = slice(-1, None) + ids = np.isnan(part[nan_indexer].squeeze(axis)) + out = np.asanyarray(mean(part[indexer], axis=axis, out=out)) + out[ids] = np.nan + return out + else: #if there are no nans # Use mean in odd and even case to coerce data type # and check, use out array. return mean(part[indexer], axis=axis, out=out) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 5766084abfae..a948fe1e0110 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -18,11 +18,11 @@ import warnings import numpy as np - +from numpy.core.fromnumeric import partition __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanvar', 'nanstd' + 'nanmedian', 'nanvar', 'nanstd' ] @@ -601,6 +601,151 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): return avg +def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays + """Private function for rank 1 arrays. Compute the median ignoring NaNs. + + Parameters + ---------- + arr1d : ndarray + Input array, of rank 1. + overwrite_input : bool, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + + Results + ------- + m : float + The median. + """ + + if overwrite_input: + part = arr1d.ravel() + #filter out the NaNs + sz = part.size - np.sum(np.isnan(part)) + if sz == 0: + warnings.warn("All-NaN slice encountered", RuntimeWarning) + return np.nan + if sz % 2 == 0: + szh = sz // 2 + part.partition((szh - 1, szh)) + else: + part.partition((sz - 1) // 2) + else: + sz = arr1d.size - np.sum(np.isnan(arr1d)) + if sz == 0: + warnings.warn("All-NaN slice encountered", RuntimeWarning) + return np.nan + if sz % 2 == 0: + part = partition(arr1d, ((sz // 2) - 1, sz // 2), axis=None) + else: + part = partition(arr1d, (sz - 1) // 2, axis=None) + if part.shape == (): + # make 0-D arrays work + return part.item() + axis = 0 + index = sz // 2 + if sz % 2 == 1: + # index with slice to allow mean (below) to work + indexer = slice(index, index+1) + else: + indexer = slice(index-1, index+1) + + # Use mean in odd and even case to coerce data type + # and check, use out array. + return np.mean(part[indexer], axis=axis) + + +def nanmedian(a, axis=None, overwrite_input=False): + """ + Compute the median along the specified axis, while ignoring NaNs. + + Returns the median of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : int, optional + Axis along which the medians are computed. The default (axis=None) + is to compute the median along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + + Returns + ------- + median : ndarray + A new array holding the result (unless `out` is specified, in + which case that array is returned instead). If the input contains + integers, or floats of smaller precision than 64, then the output + data-type is float64. Otherwise, the output data-type is the same + as that of the input. + + See Also + -------- + mean, percentile + + Notes + ----- + Given a vector V of length N, the median of V is the middle value of + a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is + odd. When N is even, it is the average of the two middle values of + ``V_sorted``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.median(a) + 3.5 + >>> np.median(a, axis=0) + array([ 6.5, 4.5, 2.5]) + >>> np.median(a, axis=1) + array([ 7., 2.]) + >>> m = np.median(a, axis=0) + >>> out = np.zeros_like(m) + >>> np.median(a, axis=0, out=m) + array([ 6.5, 4.5, 2.5]) + >>> m + array([ 6.5, 4.5, 2.5]) + >>> b = a.copy() + >>> np.median(b, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> np.median(b, axis=None, overwrite_input=True) + 3.5 + >>> assert not np.all(a==b) + + """ + a = np.asanyarray(a) + if axis is not None and axis >= a.ndim: + raise IndexError( + "axis %d out of bounds (%d)" % (axis, a.ndim)) + + if axis is None: + part = a.ravel() + return _nanmedian1d(part, overwrite_input) + else: + return np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) + + def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the specified axis, while ignoring NaNs. diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index a18b721c3589..b5622d6bfaf5 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1867,6 +1867,15 @@ def mean(self, axis=None, dtype=None, out=None): assert_equal(np.median(a), -7) def test_nan_behavior(self): + #Complex + a = np.arange(24, dtype=complex) + a[2] = complex(np.nan, 0) + assert_equal(np.median(a), np.nan) + a[2] = complex(0, np.nan) + assert_equal(np.median(a), np.nan) + a[2] = complex(np.nan, np.nan) + assert_equal(np.median(a), np.nan) + #floats a = np.arange(24, dtype=float) a[2] = np.nan assert_equal(np.median(a), np.nan) From 2d8f3ed32a8e47bd7c8b3843bd1aad8da77c4a1a Mon Sep 17 00:00:00 2001 From: empeeu Date: Sun, 16 Feb 2014 21:03:04 -0500 Subject: [PATCH 15/17] DOC: Updated doc string for nanmedian. --- numpy/lib/nanfunctions.py | 42 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index a948fe1e0110..48a001bf6652 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -673,10 +673,6 @@ def nanmedian(a, axis=None, overwrite_input=False): axis : int, optional Axis along which the medians are computed. The default (axis=None) is to compute the median along a flattened version of the array. - out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output, - but the type (of the output) will be cast if necessary. overwrite_input : bool, optional If True, then allow use of memory of input array (a) for calculations. The input array will be modified by the call to @@ -689,15 +685,14 @@ def nanmedian(a, axis=None, overwrite_input=False): Returns ------- median : ndarray - A new array holding the result (unless `out` is specified, in - which case that array is returned instead). If the input contains - integers, or floats of smaller precision than 64, then the output - data-type is float64. Otherwise, the output data-type is the same - as that of the input. + A new array holding the result. If the input contains integers, or + floats of smaller precision than 64, then the output data-type is + float64. Otherwise, the output data-type is the same as that of the + input. See Also -------- - mean, percentile + mean, median, percentile Notes ----- @@ -708,29 +703,26 @@ def nanmedian(a, axis=None, overwrite_input=False): Examples -------- - >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) + >>> a[0, 1] = np.nan >>> a - array([[10, 7, 4], - [ 3, 2, 1]]) + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) >>> np.median(a) - 3.5 - >>> np.median(a, axis=0) - array([ 6.5, 4.5, 2.5]) + nan + >>> np.nanmedian(a) + 3.0 + >>> np.nanmedian(a, axis=0) + array([ 6.5, 2., 2.5]) >>> np.median(a, axis=1) array([ 7., 2.]) - >>> m = np.median(a, axis=0) - >>> out = np.zeros_like(m) - >>> np.median(a, axis=0, out=m) - array([ 6.5, 4.5, 2.5]) - >>> m - array([ 6.5, 4.5, 2.5]) >>> b = a.copy() - >>> np.median(b, axis=1, overwrite_input=True) + >>> np.nanmedian(b, axis=1, overwrite_input=True) array([ 7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() - >>> np.median(b, axis=None, overwrite_input=True) - 3.5 + >>> np.nanmedian(b, axis=None, overwrite_input=True) + 3.0 >>> assert not np.all(a==b) """ From 52ccb4efea98bb5bac7343c5a5e12b23d4ef8b9d Mon Sep 17 00:00:00 2001 From: empeeu Date: Mon, 17 Feb 2014 21:12:57 -0500 Subject: [PATCH 16/17] REV: Reverting nanmedian from this PR. Now deals exclusively with handling of nans in median. --- numpy/lib/nanfunctions.py | 141 +------------------------------------- 1 file changed, 2 insertions(+), 139 deletions(-) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 48a001bf6652..5766084abfae 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -18,11 +18,11 @@ import warnings import numpy as np -from numpy.core.fromnumeric import partition + __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanmedian', 'nanvar', 'nanstd' + 'nanvar', 'nanstd' ] @@ -601,143 +601,6 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): return avg -def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays - """Private function for rank 1 arrays. Compute the median ignoring NaNs. - - Parameters - ---------- - arr1d : ndarray - Input array, of rank 1. - overwrite_input : bool, optional - If True, then allow use of memory of input array (a) for - calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve - the contents of the input array. Treat the input as undefined, - but it will probably be fully or partially sorted. Default is - False. Note that, if `overwrite_input` is True and the input - is not already an ndarray, an error will be raised. - - Results - ------- - m : float - The median. - """ - - if overwrite_input: - part = arr1d.ravel() - #filter out the NaNs - sz = part.size - np.sum(np.isnan(part)) - if sz == 0: - warnings.warn("All-NaN slice encountered", RuntimeWarning) - return np.nan - if sz % 2 == 0: - szh = sz // 2 - part.partition((szh - 1, szh)) - else: - part.partition((sz - 1) // 2) - else: - sz = arr1d.size - np.sum(np.isnan(arr1d)) - if sz == 0: - warnings.warn("All-NaN slice encountered", RuntimeWarning) - return np.nan - if sz % 2 == 0: - part = partition(arr1d, ((sz // 2) - 1, sz // 2), axis=None) - else: - part = partition(arr1d, (sz - 1) // 2, axis=None) - if part.shape == (): - # make 0-D arrays work - return part.item() - axis = 0 - index = sz // 2 - if sz % 2 == 1: - # index with slice to allow mean (below) to work - indexer = slice(index, index+1) - else: - indexer = slice(index-1, index+1) - - # Use mean in odd and even case to coerce data type - # and check, use out array. - return np.mean(part[indexer], axis=axis) - - -def nanmedian(a, axis=None, overwrite_input=False): - """ - Compute the median along the specified axis, while ignoring NaNs. - - Returns the median of the array elements. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - axis : int, optional - Axis along which the medians are computed. The default (axis=None) - is to compute the median along a flattened version of the array. - overwrite_input : bool, optional - If True, then allow use of memory of input array (a) for - calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve - the contents of the input array. Treat the input as undefined, - but it will probably be fully or partially sorted. Default is - False. Note that, if `overwrite_input` is True and the input - is not already an ndarray, an error will be raised. - - Returns - ------- - median : ndarray - A new array holding the result. If the input contains integers, or - floats of smaller precision than 64, then the output data-type is - float64. Otherwise, the output data-type is the same as that of the - input. - - See Also - -------- - mean, median, percentile - - Notes - ----- - Given a vector V of length N, the median of V is the middle value of - a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is - odd. When N is even, it is the average of the two middle values of - ``V_sorted``. - - Examples - -------- - >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) - >>> a[0, 1] = np.nan - >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) - >>> np.median(a) - nan - >>> np.nanmedian(a) - 3.0 - >>> np.nanmedian(a, axis=0) - array([ 6.5, 2., 2.5]) - >>> np.median(a, axis=1) - array([ 7., 2.]) - >>> b = a.copy() - >>> np.nanmedian(b, axis=1, overwrite_input=True) - array([ 7., 2.]) - >>> assert not np.all(a==b) - >>> b = a.copy() - >>> np.nanmedian(b, axis=None, overwrite_input=True) - 3.0 - >>> assert not np.all(a==b) - - """ - a = np.asanyarray(a) - if axis is not None and axis >= a.ndim: - raise IndexError( - "axis %d out of bounds (%d)" % (axis, a.ndim)) - - if axis is None: - part = a.ravel() - return _nanmedian1d(part, overwrite_input) - else: - return np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) - - def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the specified axis, while ignoring NaNs. From cbca46c66f4ddfb2f0725a5d7475cae6d0524d80 Mon Sep 17 00:00:00 2001 From: empeeu Date: Tue, 25 Feb 2014 19:28:30 -0500 Subject: [PATCH 17/17] ENH: #586 Pulls in juliantaylor's specialization. New median with nan checking is now pretty much as fast as the old one. Hence, removed the check_nan flag from previous commits. --- numpy/core/src/npysort/selection.c.src | 4 +- numpy/lib/function_base.py | 54 +++++++++----------------- 2 files changed, 20 insertions(+), 38 deletions(-) diff --git a/numpy/core/src/npysort/selection.c.src b/numpy/core/src/npysort/selection.c.src index 920c07ec6485..dc8d16d2c7f9 100644 --- a/numpy/core/src/npysort/selection.c.src +++ b/numpy/core/src/npysort/selection.c.src @@ -69,7 +69,6 @@ static NPY_INLINE void store_pivot(npy_intp pivot, npy_intp kth, * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, * npy_cdouble, npy_clongdouble# - * #inexact = 0*11, 1*7# */ static npy_intp @@ -323,8 +322,7 @@ int store_pivot(kth, kth, pivots, npiv); return 0; } - else if (@inexact@ && kth == num - 1) { - /* useful to check if NaN present via partition(d, (x, -1)) */ + else if (kth == num - 1) { npy_intp k; npy_intp maxidx = low; @type@ maxval = v[IDX(low)]; diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 72ae0b3cf3cf..53ae4d82e558 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -373,10 +373,10 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): if not np.isinf(mindiff): decimal = int(-log10(mindiff)) + 6 # Find which points are on the rightmost edge. - not_smaller_than_edge = (sample[:, i] >= edges[i][-1]) - on_edge = (around(sample[:, i], decimal) == around(edges[i][-1], decimal)) + on_edge = where(around(sample[:, i], decimal) == + around(edges[i][-1], decimal))[0] # Shift these points one bin to the left. - Ncount[i][where(on_edge & not_smaller_than_edge)[0]] -= 1 + Ncount[i][on_edge] -= 1 # Flattened histogram matrix (1D) # Reshape is used so that overlarge arrays @@ -2692,7 +2692,7 @@ def msort(a): return b -def median(a, axis=None, out=None, overwrite_input=False): +def median(a, axis=None, out=None, overwrite_input=False, check_nan=True): """ Compute the median along the specified axis. @@ -2706,23 +2706,23 @@ def median(a, axis=None, out=None, overwrite_input=False): Axis along which the medians are computed. The default (axis=None) is to compute the median along a flattened version of the array. out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape and buffer length as the expected output, but the - type (of the output) will be cast if necessary. + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. overwrite_input : bool, optional If True, then allow use of memory of input array (a) for calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve the - contents of the input array. Treat the input as undefined, but it - will probably be fully or partially sorted. Default is False. Note - that, if `overwrite_input` is True and the input is not already an - ndarray, an error will be raised. - + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + Returns ------- median : ndarray - A new array holding the result (unless `out` is specified, in which - case that array is returned instead). If the input contains + A new array holding the result (unless `out` is specified, in + which case that array is returned instead). If the input contains integers, or floats of smaller precision than 64, then the output data-type is float64. Otherwise, the output data-type is the same as that of the input. @@ -2782,7 +2782,7 @@ def median(a, axis=None, out=None, overwrite_input=False): else: kth = [(sz - 1) // 2] #Check if the array contains any nan's - if check_nan and np.issubdtype(a.dtype, np.inexact): + if np.issubdtype(a.dtype, np.inexact): kth.append(-1) if overwrite_input: @@ -2790,12 +2790,6 @@ def median(a, axis=None, out=None, overwrite_input=False): part = a.ravel() part.partition(kth) else: - sz = a.shape[axis] - if sz % 2 == 0: - szh = sz // 2 - a.partition((szh - 1, szh), axis=axis) - else: - a.partition((sz - 1) // 2, axis=axis) part = a a.partition(kth, axis=axis) else: @@ -2814,7 +2808,7 @@ def median(a, axis=None, out=None, overwrite_input=False): else: indexer[axis] = slice(index-1, index+1) #Check if the array contains any nan's - if check_nan and np.issubdtype(a.dtype, np.inexact): + if np.issubdtype(a.dtype, np.inexact): if part.ndim <= 1: if np.isnan(part[-1]): return np.nan @@ -2831,12 +2825,6 @@ def median(a, axis=None, out=None, overwrite_input=False): # Use mean in odd and even case to coerce data type # and check, use out array. return mean(part[indexer], axis=axis, out=out) - else: #replace results where needed with nans - out = mean(part[indexer], axis=axis, out=out) - out[ids] = np.nan - return out - - def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear'): @@ -3207,8 +3195,7 @@ def meshgrid(*xi, **kwargs): for j in range(ny): # treat xv[j,i], yv[j,i] - In the 1-D and 0-D case, the indexing and sparse keywords have no - effect. + In the 1-D and 0-D case, the indexing and sparse keywords have no effect. See Also -------- @@ -3284,8 +3271,7 @@ def meshgrid(*xi, **kwargs): def delete(arr, obj, axis=None): """ Return a new array with sub-arrays along an axis deleted. For a one - dimensional array, this returns those entries not returned by - `arr[obj]`. + dimensional array, this returns those entries not returned by `arr[obj]`. Parameters ---------- @@ -3312,11 +3298,9 @@ def delete(arr, obj, axis=None): Notes ----- Often it is preferable to use a boolean mask. For example: - >>> mask = np.ones(len(arr), dtype=bool) >>> mask[[0,2,4]] = False >>> result = arr[mask,...] - Is equivalent to `np.delete(arr, [0,2,4], axis=0)`, but allows further use of `mask`.