Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Remove copy of scipy functions; use Bottleneck instead.

This makes several functions faster. And removes code
from la.
  • Loading branch information...
commit 8bdf1fea96a4a80600b4b269dc3c9ca602b55848 1 parent 0e28deb
@kwgoodman authored
View
6 README.rst
@@ -70,7 +70,7 @@ Install
Requirements:
======================== ====================================================
-la Python, NumPy 1.5.1, Bottleneck 0.4.2
+la Python, NumPy 1.5.1, Bottleneck 0.4.3
Unit tests nose
======================== ====================================================
@@ -118,9 +118,9 @@ After you have installed ``la``, run the suite of unit tests::
>>> import la
>>> la.test()
<snip>
- Ran 2998 tests in 2.406s
+ Ran 2975 tests in 2.017s
OK
- <nose.result.TextTestResult run=2998 errors=0 failures=0>
+ <nose.result.TextTestResult run=2975 errors=0 failures=0>
The ``la`` package contains C extensions that speed up common alignment
operations such as adding two unaligned larrys. If the C extensions don't
View
3  RELEASE.rst
@@ -17,7 +17,8 @@ Bottleneck package (http://pypi.python.org/pypi/Bottleneck).
**Faster**
-- sum, mean, std, var, min, max, median, ranking
+- sum, mean, std, var, min, max, median, ranking
+- correlation, group_median, demean, demedian, zscore
**Moving window**
View
227 la/external/scipy.py
@@ -1,227 +0,0 @@
-"""
-The functions in this module were copied from SciPy to avoid making la
-depend on SciPy. See the la LICENSE file for the SciPy license.
-
-In the SciPy project, these functions can be found in scipy/stats/stats.py
-
-One change was made to the SciPy version of nanstd. The default for nanstd
-was changed from bias=False (N-1 normalization) to bias=False (N
-normalization). That makes it match the defaults for np.std and scipy.std.
-
-nanmedian has been modifed. See http://projects.scipy.org/scipy/ticket/1098
-
-"""
-
-import numpy as np
-
-
-def _chk_asarray(a, axis):
- if axis is None:
- a = np.ravel(a)
- outaxis = 0
- else:
- a = np.asarray(a)
- outaxis = axis
- return a, outaxis
-
-def nanmean(x, axis=0):
- """Compute the mean over the given axis ignoring nans.
-
- :Parameters:
- x : ndarray
- input array
- axis : int
- axis along which the mean is computed.
-
- :Results:
- m : float
- the mean."""
- x, axis = _chk_asarray(x,axis)
- x = x.copy()
- Norig = x.shape[axis]
- factor = 1.0-np.sum(np.isnan(x),axis)*1.0/Norig
-
- x[np.isnan(x)] = 0
- return np.mean(x,axis)/factor
-
-# Three changes were made to the SciPy version of nanstd:
-# 1: The default for nanstd was changed from bias=False (N-1 normalization)
-# to bias=False (N normalization). That makes it match the defaults for
-# np.std and scipy.std.
-# 2: array was changed to np.array.
-# 3: Bug fix to allow negative axis
-# http://projects.scipy.org/scipy/ticket/1161
-def nanstd(x, axis=0, bias=True):
- """Compute the standard deviation over the given axis ignoring nans
-
- :Parameters:
- x : ndarray
- input array
- axis : int
- axis along which the standard deviation is computed.
- bias : boolean
- If true, the biased (normalized by N, default) definition is used.
- If false, the unbiased (N-1) is used.
-
- :Results:
- s : float
- the standard deviation."""
- x, axis = _chk_asarray(x,axis)
- x = x.copy()
- Norig = x.shape[axis]
-
- Nnan = np.sum(np.isnan(x),axis)*1.0
- n = Norig - Nnan
-
- x[np.isnan(x)] = 0.
- m1 = np.sum(x,axis)/n
-
- if axis:
- d = (x - np.expand_dims(m1, axis))**2.0
- else:
- d = (x - m1)**2.0
-
- m2 = np.sum(d,axis)-(m1*m1)*Nnan
- if bias:
- m2c = m2 / n
- else:
- m2c = m2 / (n - 1.)
- return np.sqrt(m2c)
-
-# nanvar doesn't exist in scipy. I just copied nanstd above and removed
-# np.sqrt
-def nanvar(x, axis=0, bias=True):
- """Compute the variance over the given axis ignoring nans
-
- :Parameters:
- x : ndarray
- input array
- axis : int
- axis along which the variance is computed.
- bias : boolean
- If true, the biased (normalized by N, default) definition is used.
- If false, the unbiased (N-1) is used.
-
- :Results:
- s : float
- the variance."""
- x, axis = _chk_asarray(x,axis)
- x = x.copy()
- Norig = x.shape[axis]
-
- Nnan = np.sum(np.isnan(x),axis)*1.0
- n = Norig - Nnan
-
- x[np.isnan(x)] = 0.
- m1 = np.sum(x,axis)/n
-
- if axis:
- d = (x - np.expand_dims(m1, axis))**2.0
- else:
- d = (x - m1)**2.0
-
- m2 = np.sum(d,axis)-(m1*m1)*Nnan
- if bias:
- m2c = m2 / n
- else:
- m2c = m2 / (n - 1.)
- return m2c
-
-def _nanmedian(arr1d): # This only works on 1d arrays
- """Private function for rank a arrays. Compute the median ignoring Nan.
-
- :Parameters:
- arr1d : rank 1 ndarray
- input array
-
- :Results:
- m : float
- the median."""
- cond = 1-np.isnan(arr1d)
- x = np.sort(np.compress(cond,arr1d,axis=-1))
- if x.size == 0:
- return np.nan
- return np.median(x)
-
-# A change was made from the scipy version to handle scalar input and to
-# return a scalar when a 1d array is passed in or when axis is None.
-def nanmedian(x, axis=0):
- """ Compute the median along the given axis ignoring nan values
-
- Note: This function has been modified from the original scipy function.
- See http://projects.scipy.org/scipy/ticket/1098
-
- :Parameters:
- x : ndarray
- input array
- axis : int
- axis along which the median is computed.
-
- :Results:
- m : float
- the median."""
- x, axis = _chk_asarray(x,axis)
- if x.ndim == 0:
- return np.float(x)
- x = x.copy()
- x = np.apply_along_axis(_nanmedian,axis,x)
- if x.ndim == 0:
- x = np.float(x)
- return x
-
-def rankdata(a):
- """Ranks the data in a, dealing with ties appropriately.
-
- Equal values are assigned a rank that is the average of the ranks that
- would have been otherwise assigned to all of the values within that set.
- Ranks begin at 1, not 0.
-
- Example
- -------
- In [15]: stats.rankdata([0, 2, 2, 3])
- Out[15]: array([ 1. , 2.5, 2.5, 4. ])
-
- Parameters
- ----------
- a : array
- This array is first flattened.
-
- Returns
- -------
- An array of length equal to the size of a, containing rank scores.
- """
- a = np.ravel(a)
- n = len(a)
- svec, ivec = fastsort(a)
- sumranks = 0
- dupcount = 0
- newarray = np.zeros(n, float)
- for i in xrange(n):
- sumranks += i
- dupcount += 1
- if i==n-1 or svec[i] != svec[i+1]:
- averank = sumranks / float(dupcount) + 1
- for j in xrange(i-dupcount+1,i+1):
- newarray[ivec[j]] = averank
- sumranks = 0
- dupcount = 0
- return newarray
-
-def fastsort(a):
- # fixme: the wording in the docstring is nonsense.
- """Sort an array and provide the argsort.
-
- Parameters
- ----------
- a : array
-
- Returns
- -------
- (sorted array,
- indices into the original array,
- )
- """
- it = np.argsort(a)
- as_ = a[it]
- return as_, it
-
View
106 la/external/tests/scipy_test.py
@@ -1,106 +0,0 @@
-"""
-The functions in this module were copied from SciPy to avoid making la
-depend on SciPy. See the la LICENSE file for the SciPy license.
-
-In the SciPy project, these functions can be found in
-scipy/stats/tests/test_stats.py and scipy/stats/tests/test_mstats_basic.py
-
-Some modifications were made.
-
-"""
-
-import numpy as np
-from numpy.testing import *
-
-from la.external.scipy import nanmean, nanmedian, nanstd, nanvar, rankdata
-
-
-X = np.array([1,2,3,4,5,6,7,8,9],float)
-
-class TestNanFunc(TestCase):
- def __init__(self, *args, **kw):
- TestCase.__init__(self, *args, **kw)
- self.X = X.copy()
-
- self.Xall = X.copy()
- self.Xall[:] = np.nan
-
- self.Xsome = X.copy()
- self.Xsomet = X.copy()
- self.Xsome[0] = np.nan
- self.Xsomet = self.Xsomet[1:]
-
- def test_nanmean_none(self):
- """Check nanmean when no values are nan."""
- m = nanmean(X)
- assert_approx_equal(m, X[4])
-
- def test_nanmean_some(self):
- """Check nanmean when some values only are nan."""
- m = nanmean(self.Xsome)
- assert_approx_equal(m, 5.5)
-
- def test_nanmean_all(self):
- """Check nanmean when all values are nan."""
- m = nanmean(self.Xall)
- assert np.isnan(m)
-
- def test_nanstd_none(self):
- """Check nanstd when no values are nan."""
- s = nanstd(self.X)
- assert_approx_equal(s, np.std(self.X, ddof=0))
-
- def test_nanstd_some(self):
- """Check nanstd when some values only are nan."""
- s = nanstd(self.Xsome)
- assert_approx_equal(s, np.std(self.Xsomet, ddof=0))
-
- def test_nanstd_all(self):
- """Check nanstd when all values are nan."""
- s = nanstd(self.Xall)
- assert np.isnan(s)
-
- def test_nanvar_none(self):
- """Check nanvar when no values are nan."""
- s = nanvar(self.X)
- assert_approx_equal(s, np.var(self.X, ddof=0))
-
- def test_nanvar_some(self):
- """Check nanvar when some values only are nan."""
- s = nanvar(self.Xsome)
- assert_approx_equal(s, np.var(self.Xsomet, ddof=0))
-
- def test_nanvar_all(self):
- """Check nanvar when all values are nan."""
- s = nanvar(self.Xall)
- assert np.isnan(s)
-
- def test_nanmedian_none(self):
- """Check nanmedian when no values are nan."""
- m = nanmedian(self.X)
- assert_approx_equal(m, np.median(self.X))
-
- def test_nanmedian_some(self):
- """Check nanmedian when some values only are nan."""
- m = nanmedian(self.Xsome)
- assert_approx_equal(m, np.median(self.Xsomet))
-
- def test_nanmedian_all(self):
- """Check nanmedian when all values are nan."""
- m = nanmedian(self.Xall)
- assert np.isnan(m)
-
-class TestRanking(TestCase):
-
- def __init__(self, *args, **kwargs):
- TestCase.__init__(self, *args, **kwargs)
-
- def test_ranking(self):
- x = np.array([0,1,1,1,2,3,4,5,5,6,])
- assert_almost_equal(rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
- x = np.array([0,1,5,1,2,4,3,5,1,6,])
- assert_almost_equal(rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10])
-
-
-if __name__ == "__main__":
- run_module_suite()
View
5 la/farray/group.py
@@ -1,7 +1,8 @@
"NaN-aware numpy array functions for group by operations."
import numpy as np
-from la.farray import ranking, nanmedian
+import bottleneck as bn
+from la.farray import ranking
__all__ = ['group_ranking', 'group_mean', 'group_median', 'unique_group']
@@ -129,7 +130,7 @@ def group_median(x, groups, axis=0):
idxall = [slice(None)] * x.ndim
idxall[axis] = idx
if idx.sum() > 0:
- ns = nanmedian(x[idxall], axis=axis)
+ ns = bn.nanmedian(x[idxall], axis=axis)
xmedian[idxall] = np.expand_dims(ns, axis)
return xmedian
View
13 la/farray/misc.py
@@ -1,11 +1,10 @@
"NaN-aware numpy array functions for miscellaneous operations."
import numpy as np
+import bottleneck as bn
-from la.external.scipy import nanmedian, rankdata, nanstd, nanvar, nanmean
+__all__ = ['geometric_mean', 'correlation', 'covMissing', 'shuffle']
-__all__ = ['geometric_mean', 'correlation', 'covMissing', 'shuffle', 'nanmean',
- 'nanmedian', 'nanstd', 'nanvar']
def geometric_mean(x, axis=-1, check_for_greater_than_zero=True):
"""
@@ -101,13 +100,13 @@ def correlation(arr1, arr2, axis=None):
x1[mask] = np.nan
x2[mask] = np.nan
if axis == 0:
- x1 = x1 - nanmean(x1, axis)
- x2 = x2 - nanmean(x2, axis)
+ x1 = x1 - bn.nanmean(x1, axis)
+ x2 = x2 - bn.nanmean(x2, axis)
else:
idx = [slice(None)] * x1.ndim
idx[axis] = None
- x1 = x1 - nanmean(x1, axis)[idx]
- x2 = x2 - nanmean(x2, axis)[idx]
+ x1 = x1 - bn.nanmean(x1, axis)[idx]
+ x2 = x2 - bn.nanmean(x2, axis)[idx]
num = np.nansum(x1 * x2, axis)
den = np.sqrt(np.nansum(x1 * x1, axis) * np.nansum(x2 * x2, axis))
else:
View
7 la/farray/move.py
@@ -1,9 +1,10 @@
"Moving (rolling) statistics on numpy arrays."
import numpy as np
+import bottleneck as bn
from la.missing import nans, ismissing
-from la.farray import nanmean, nanstd, nanvar, lastrank, nanmedian
+from la.farray import lastrank
__all__ = ['move_median', 'move_nanmedian', 'move_func', 'move_nanranking',
'movingsum', 'movingsum_forward', 'movingrank'] #Last row deprecated
@@ -88,9 +89,9 @@ def move_nanmedian(arr, window, axis=-1, method='loop'):
"""
if method == 'strides':
- y = move_func_strides(nanmedian, arr, window, axis=axis)
+ y = move_func_strides(bn.nanmedian, arr, window, axis=axis)
elif method == 'loop':
- y = move_func_loop(nanmedian, arr, window, axis=axis)
+ y = move_func_loop(bn.nanmedian, arr, window, axis=axis)
else:
msg = "`method` must be 'strides' or 'loop'."
raise ValueError, msg
View
7 la/farray/normalize.py
@@ -9,7 +9,6 @@
import bottleneck as bn
from la.missing import nans
-from la.external.scipy import nanmedian, nanstd, nanmean
__all__ = ['lastrank', 'ranking', 'push', 'quantile', 'demean',
'demedian', 'zscore']
@@ -314,7 +313,7 @@ def demean(arr, axis=None):
array([ -1., NaN, 0., 1.])
"""
- marr = nanmean(arr, axis)
+ marr = bn.nanmean(arr, axis)
if (axis != 0) and (not axis is None) and (not np.isscalar(marr)):
ind = [slice(None)] * arr.ndim
ind[axis] = np.newaxis
@@ -345,7 +344,7 @@ def demedian(arr, axis=None):
array([ -1., NaN, 0., 8.])
"""
- marr = nanmedian(arr, axis)
+ marr = bn.nanmedian(arr, axis)
if (axis != 0) and (not axis is None) and (not np.isscalar(marr)):
ind = [slice(None)] * arr.ndim
ind[axis] = np.newaxis
@@ -377,7 +376,7 @@ def zscore(arr, axis=None):
"""
arr = demean(arr, axis)
- norm = nanstd(arr, axis)
+ norm = bn.nanstd(arr, axis)
if (axis != 0) and (not axis is None) and (not np.isscalar(norm)):
ind = [slice(None)] * arr.ndim
ind[axis] = np.newaxis
View
5 la/farray/tests/move_test.py
@@ -3,8 +3,9 @@
import numpy as np
from numpy.testing import assert_array_almost_equal
nan = np.nan
+import bottleneck as bn
-from la.farray import move_median, move_nanmedian, move_func, nanmedian
+from la.farray import move_median, move_nanmedian, move_func
def move_unit_maker(func, arrfunc, methods):
@@ -46,4 +47,4 @@ def test_move_median():
def test_move_nanmedian():
"Test move_nanmedian."
methods = ('strides', 'func_loop', 'func_strides')
- yield move_unit_maker, move_nanmedian, nanmedian, methods
+ yield move_unit_maker, move_nanmedian, bn.nanmedian, methods
View
8 la/tests/deflarry_nose_test.py
@@ -324,28 +324,28 @@ def test_grouprank(self):
self.check_function(t, label, p, self.lar)
def test_groupmean(self):
- "larry.groumean" #not in deflarry_test
+ "larry.group_mean" #not in deflarry_test
t = self.tmean1
label = self.label
p = self.lar.group_mean(self.sectors)
self.check_function(t, label, p, self.lar)
def test_groupmedian(self):
- "larry.groumedian" #not in deflarry_test
+ "larry.group_median" #not in deflarry_test
t = self.tmedian1
label = self.label
p = self.lar.group_median(self.sectors)
self.check_function(t, label, p, self.lar)
def test_groupmean3(self):
- "larry.groupmean_3d" #not in deflarry_test
+ "larry.group_mean_3d" #not in deflarry_test
t = self.tmean3
label = self.label3
p = self.lar3.group_mean(self.sectors)
self.check_function(t, label, p, self.lar3)
def test_groupmedian3(self):
- "larry.groupmedian_3d" #not in deflarry_test
+ "larry.group_median_3d" #not in deflarry_test
t = self.tmedian3
label = self.label3
p = self.lar3.group_median(self.sectors)
View
4 la/tests/more_test.py
@@ -7,10 +7,10 @@
from la.farray import (push, geometric_mean, lastrank,
movingrank, movingsum_forward,
quantile, ranking, group_mean, group_median,
- group_ranking, nanmedian, nanstd)
+ group_ranking)
# Functions to test
-funcs_one = [geometric_mean, lastrank, nanstd, ranking, nanmedian]
+funcs_one = [geometric_mean, lastrank, ranking]
funcs_oneint = [movingrank, movingsum_forward, quantile, push]
funcs_onefrac = [lastrank]
funcs_sect = [group_mean, group_median, group_ranking]
View
3  la/tests/test_3d.py
@@ -11,9 +11,6 @@ def getfuncs(argint, argfrac, argsector):
funcs = [('geometric_mean' , (), ()),
('lastrank' , (), ()),
('ranking' , (), ()),
- ('nanmedian' , (), ()),
- ('nanmean' , (), ()),
- ('nanstd' , (), ()),
('movingrank' , (argint,), ()),
('movingsum_forward' , (argint,), ()),
('quantile' , (argint,), ()),
Please sign in to comment.
Something went wrong with that request. Please try again.