Skip to content

Commit

Permalink
Yet another speed up of the selector functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
kwgoodman committed Jan 19, 2011
1 parent 0dde7e1 commit c2163dd
Show file tree
Hide file tree
Showing 48 changed files with 756 additions and 747 deletions.
38 changes: 19 additions & 19 deletions README.rst
Expand Up @@ -46,17 +46,17 @@ Bottleneck is fast::

>>> arr = np.random.rand(100, 100)
>>> timeit np.nanmax(arr)
10000 loops, best of 3: 93.9 us per loop
10000 loops, best of 3: 91.5 us per loop
>>> timeit bn.nanmax(arr)
100000 loops, best of 3: 14.3 us per loop
100000 loops, best of 3: 13.3 us per loop

Let's not forget to add some NaNs::

>>> arr[arr > 0.5] = np.nan
>>> timeit np.nanmax(arr)
10000 loops, best of 3: 139 us per loop
10000 loops, best of 3: 140 us per loop
>>> timeit bn.nanmax(arr)
100000 loops, best of 3: 14.3 us per loop
100000 loops, best of 3: 13.2 us per loop

Bottleneck comes with a benchmark suite. To run the benchmark::
Expand All @@ -71,21 +71,21 @@ Bottleneck comes with a benchmark suite. To run the benchmark::

no NaN no NaN no NaN NaN NaN NaN
(10,10) (100,100) (1000,1000) (10,10) (100,100) (1000,1000)
median 6.59 13.83 7.22 5.83 3.50 2.83
nanmedian 160.86 135.68 8.27 163.28 179.70 8.05
nansum 7.51 5.85 1.71 7.56 6.74 1.70
nanmax 8.40 5.89 1.67 8.66 9.37 1.68
nanmean 16.02 13.48 2.98 16.77 27.22 4.97
nanstd 20.37 9.33 2.62 21.01 17.10 3.64
nanargmax 7.79 5.72 2.57 7.78 8.43 2.76
move_sum 7.82 7.86 14.41 7.82 8.20 13.95
move_nansum 18.89 18.73 29.44 18.31 23.83 29.83
move_mean 7.14 4.17 14.47 7.17 8.13 14.28
move_nanmean 22.34 11.56 29.87 22.77 14.09 30.91
move_std 12.26 3.31 22.94 14.45 19.95 29.92
move_nanstd 25.12 6.14 34.99 27.98 6.97 36.13
move_max 3.07 3.64 9.33 3.64 5.44 11.76
move_nanmax 16.04 6.28 19.38 16.72 14.56 27.44
median 9.71 13.75 7.24 8.23 3.67 2.82
nanmedian 238.04 128.73 8.11 247.20 174.31 8.02
nansum 13.40 6.50 1.70 13.34 7.43 1.69
nanmax 13.08 6.35 1.67 14.08 10.42 1.69
nanmean 23.88 13.87 2.97 24.78 28.81 4.95
nanstd 33.04 9.75 2.62 33.33 17.96 3.66
nanargmax 12.10 5.85 2.59 12.48 9.02 2.79
move_sum 11.95 8.27 14.53 11.73 8.63 14.11
move_nansum 31.44 20.06 29.41 30.16 25.53 29.80
move_mean 10.99 4.28 14.46 10.94 8.45 14.26
move_nanmean 32.77 11.76 29.87 33.38 14.43 30.80
move_std 17.28 3.33 22.95 21.63 20.66 29.86
move_nanstd 34.08 6.18 35.02 39.70 7.01 36.18
move_max 4.21 3.64 9.36 4.89 6.07 11.81
move_nanmax 22.19 6.28 19.66 24.07 14.71 27.19

Reference functions:
median np.median
Expand Down
5 changes: 3 additions & 2 deletions RELEASE.rst
Expand Up @@ -11,11 +11,12 @@ Bottleneck 0.3.0

*Release date: Not yet released, in development*

The third release of Bottleneck is faster and contains 10 new function.
The third release of Bottleneck is twice as fast for small input arrays and
contains 10 new functions.

**Faster**

- All functions are faster (less overhead in high-level functions)
- All functions are faster (less overhead in selector functions)

**New functions**

Expand Down
5 changes: 4 additions & 1 deletion bottleneck/src/func/func_header.pyx
Expand Up @@ -5,7 +5,8 @@ from numpy cimport NPY_INT32 as NPY_int32
from numpy cimport NPY_INT64 as NPY_int64
from numpy cimport NPY_FLOAT32 as NPY_float32
from numpy cimport NPY_FLOAT64 as NPY_float64
from numpy cimport PyArray_EMPTY, import_array
from numpy cimport (PyArray_EMPTY, PyArray_TYPE, PyArray_NDIM,
PyArray_SIZE, import_array)
import_array()
import bottleneck as bn

Expand All @@ -28,8 +29,10 @@ float64 = np.dtype(np.float64)

if np.int_ == np.int32:
NPINT = 'int32'
NPY_int_ = NPY_int32
elif np.int_ == np.int64:
NPINT = 'int64'
NPY_int_ = NPY_int64
else:
raise RuntimeError('Expecting default NumPy int to be 32 or 64 bit.')

Expand Down
54 changes: 27 additions & 27 deletions bottleneck/src/func/median.pyx
Expand Up @@ -116,8 +116,8 @@ def median_selector(arr, axis):
else:
a = np.array(arr, copy=False)
cdef tuple key
cdef int ndim = a.ndim
cdef np.dtype dtype = a.dtype
cdef int ndim = PyArray_NDIM(a)
cdef int dtype = PyArray_TYPE(a)
if axis is not None:
if axis < 0:
axis += ndim
Expand All @@ -134,7 +134,7 @@ def median_selector(arr, axis):
try:
func = median_slow_dict[axis]
except KeyError:
tup = (str(ndim), str(dtype), str(axis))
tup = (str(ndim), str(a.dtype), str(axis))
raise TypeError, "Unsupported ndim/dtype/axis (%s/%s/%s)." % tup
return func, a

Expand Down Expand Up @@ -1175,30 +1175,30 @@ def median_3d_float64_axis2(np.ndarray[np.float64_t, ndim=3] a):
return y

cdef dict median_dict = {}
median_dict[(1, int32, 0)] = median_1d_int32_axis0
median_dict[(1, int64, 0)] = median_1d_int64_axis0
median_dict[(2, int32, 0)] = median_2d_int32_axis0
median_dict[(2, int32, 1)] = median_2d_int32_axis1
median_dict[(2, int64, 0)] = median_2d_int64_axis0
median_dict[(2, int64, 1)] = median_2d_int64_axis1
median_dict[(3, int32, 0)] = median_3d_int32_axis0
median_dict[(3, int32, 1)] = median_3d_int32_axis1
median_dict[(3, int32, 2)] = median_3d_int32_axis2
median_dict[(3, int64, 0)] = median_3d_int64_axis0
median_dict[(3, int64, 1)] = median_3d_int64_axis1
median_dict[(3, int64, 2)] = median_3d_int64_axis2
median_dict[(1, float32, 0)] = median_1d_float32_axis0
median_dict[(1, float64, 0)] = median_1d_float64_axis0
median_dict[(2, float32, 0)] = median_2d_float32_axis0
median_dict[(2, float32, 1)] = median_2d_float32_axis1
median_dict[(2, float64, 0)] = median_2d_float64_axis0
median_dict[(2, float64, 1)] = median_2d_float64_axis1
median_dict[(3, float32, 0)] = median_3d_float32_axis0
median_dict[(3, float32, 1)] = median_3d_float32_axis1
median_dict[(3, float32, 2)] = median_3d_float32_axis2
median_dict[(3, float64, 0)] = median_3d_float64_axis0
median_dict[(3, float64, 1)] = median_3d_float64_axis1
median_dict[(3, float64, 2)] = median_3d_float64_axis2
median_dict[(1, NPY_int32, 0)] = median_1d_int32_axis0
median_dict[(1, NPY_int64, 0)] = median_1d_int64_axis0
median_dict[(2, NPY_int32, 0)] = median_2d_int32_axis0
median_dict[(2, NPY_int32, 1)] = median_2d_int32_axis1
median_dict[(2, NPY_int64, 0)] = median_2d_int64_axis0
median_dict[(2, NPY_int64, 1)] = median_2d_int64_axis1
median_dict[(3, NPY_int32, 0)] = median_3d_int32_axis0
median_dict[(3, NPY_int32, 1)] = median_3d_int32_axis1
median_dict[(3, NPY_int32, 2)] = median_3d_int32_axis2
median_dict[(3, NPY_int64, 0)] = median_3d_int64_axis0
median_dict[(3, NPY_int64, 1)] = median_3d_int64_axis1
median_dict[(3, NPY_int64, 2)] = median_3d_int64_axis2
median_dict[(1, NPY_float32, 0)] = median_1d_float32_axis0
median_dict[(1, NPY_float64, 0)] = median_1d_float64_axis0
median_dict[(2, NPY_float32, 0)] = median_2d_float32_axis0
median_dict[(2, NPY_float32, 1)] = median_2d_float32_axis1
median_dict[(2, NPY_float64, 0)] = median_2d_float64_axis0
median_dict[(2, NPY_float64, 1)] = median_2d_float64_axis1
median_dict[(3, NPY_float32, 0)] = median_3d_float32_axis0
median_dict[(3, NPY_float32, 1)] = median_3d_float32_axis1
median_dict[(3, NPY_float32, 2)] = median_3d_float32_axis2
median_dict[(3, NPY_float64, 0)] = median_3d_float64_axis0
median_dict[(3, NPY_float64, 1)] = median_3d_float64_axis1
median_dict[(3, NPY_float64, 2)] = median_3d_float64_axis2

cdef dict median_slow_dict = {}
median_slow_dict[0] = median_slow_axis0
Expand Down
56 changes: 28 additions & 28 deletions bottleneck/src/func/nanargmax.pyx
Expand Up @@ -95,9 +95,9 @@ def nanargmax_selector(arr, axis):
a = arr
else:
a = np.array(arr, copy=False)
cdef int ndim = a.ndim
cdef np.dtype dtype = a.dtype
cdef int size = a.size
cdef int ndim = PyArray_NDIM(a)
cdef int dtype = PyArray_TYPE(a)
cdef int size = PyArray_SIZE(a)
if size == 0:
msg = "numpy.nanargmax() raises on size=0; so Bottleneck does too."
raise ValueError, msg
Expand All @@ -117,7 +117,7 @@ def nanargmax_selector(arr, axis):
try:
func = nanargmax_slow_dict[axis]
except KeyError:
tup = (str(ndim), str(dtype), str(axis))
tup = (str(ndim), str(a.dtype), str(axis))
raise TypeError, "Unsupported ndim/dtype/axis (%s/%s/%s)." % tup
return func, a

Expand Down Expand Up @@ -732,30 +732,30 @@ def nanargmax_3d_float64_axis2(np.ndarray[np.float64_t, ndim=3] a):
return y

cdef dict nanargmax_dict = {}
nanargmax_dict[(1, int32, 0)] = nanargmax_1d_int32_axis0
nanargmax_dict[(1, int64, 0)] = nanargmax_1d_int64_axis0
nanargmax_dict[(2, int32, 0)] = nanargmax_2d_int32_axis0
nanargmax_dict[(2, int32, 1)] = nanargmax_2d_int32_axis1
nanargmax_dict[(2, int64, 0)] = nanargmax_2d_int64_axis0
nanargmax_dict[(2, int64, 1)] = nanargmax_2d_int64_axis1
nanargmax_dict[(3, int32, 0)] = nanargmax_3d_int32_axis0
nanargmax_dict[(3, int32, 1)] = nanargmax_3d_int32_axis1
nanargmax_dict[(3, int32, 2)] = nanargmax_3d_int32_axis2
nanargmax_dict[(3, int64, 0)] = nanargmax_3d_int64_axis0
nanargmax_dict[(3, int64, 1)] = nanargmax_3d_int64_axis1
nanargmax_dict[(3, int64, 2)] = nanargmax_3d_int64_axis2
nanargmax_dict[(1, float32, 0)] = nanargmax_1d_float32_axis0
nanargmax_dict[(1, float64, 0)] = nanargmax_1d_float64_axis0
nanargmax_dict[(2, float32, 0)] = nanargmax_2d_float32_axis0
nanargmax_dict[(2, float32, 1)] = nanargmax_2d_float32_axis1
nanargmax_dict[(2, float64, 0)] = nanargmax_2d_float64_axis0
nanargmax_dict[(2, float64, 1)] = nanargmax_2d_float64_axis1
nanargmax_dict[(3, float32, 0)] = nanargmax_3d_float32_axis0
nanargmax_dict[(3, float32, 1)] = nanargmax_3d_float32_axis1
nanargmax_dict[(3, float32, 2)] = nanargmax_3d_float32_axis2
nanargmax_dict[(3, float64, 0)] = nanargmax_3d_float64_axis0
nanargmax_dict[(3, float64, 1)] = nanargmax_3d_float64_axis1
nanargmax_dict[(3, float64, 2)] = nanargmax_3d_float64_axis2
nanargmax_dict[(1, NPY_int32, 0)] = nanargmax_1d_int32_axis0
nanargmax_dict[(1, NPY_int64, 0)] = nanargmax_1d_int64_axis0
nanargmax_dict[(2, NPY_int32, 0)] = nanargmax_2d_int32_axis0
nanargmax_dict[(2, NPY_int32, 1)] = nanargmax_2d_int32_axis1
nanargmax_dict[(2, NPY_int64, 0)] = nanargmax_2d_int64_axis0
nanargmax_dict[(2, NPY_int64, 1)] = nanargmax_2d_int64_axis1
nanargmax_dict[(3, NPY_int32, 0)] = nanargmax_3d_int32_axis0
nanargmax_dict[(3, NPY_int32, 1)] = nanargmax_3d_int32_axis1
nanargmax_dict[(3, NPY_int32, 2)] = nanargmax_3d_int32_axis2
nanargmax_dict[(3, NPY_int64, 0)] = nanargmax_3d_int64_axis0
nanargmax_dict[(3, NPY_int64, 1)] = nanargmax_3d_int64_axis1
nanargmax_dict[(3, NPY_int64, 2)] = nanargmax_3d_int64_axis2
nanargmax_dict[(1, NPY_float32, 0)] = nanargmax_1d_float32_axis0
nanargmax_dict[(1, NPY_float64, 0)] = nanargmax_1d_float64_axis0
nanargmax_dict[(2, NPY_float32, 0)] = nanargmax_2d_float32_axis0
nanargmax_dict[(2, NPY_float32, 1)] = nanargmax_2d_float32_axis1
nanargmax_dict[(2, NPY_float64, 0)] = nanargmax_2d_float64_axis0
nanargmax_dict[(2, NPY_float64, 1)] = nanargmax_2d_float64_axis1
nanargmax_dict[(3, NPY_float32, 0)] = nanargmax_3d_float32_axis0
nanargmax_dict[(3, NPY_float32, 1)] = nanargmax_3d_float32_axis1
nanargmax_dict[(3, NPY_float32, 2)] = nanargmax_3d_float32_axis2
nanargmax_dict[(3, NPY_float64, 0)] = nanargmax_3d_float64_axis0
nanargmax_dict[(3, NPY_float64, 1)] = nanargmax_3d_float64_axis1
nanargmax_dict[(3, NPY_float64, 2)] = nanargmax_3d_float64_axis2

cdef dict nanargmax_slow_dict = {}
nanargmax_slow_dict[0] = nanargmax_slow_axis0
Expand Down
56 changes: 28 additions & 28 deletions bottleneck/src/func/nanargmin.pyx
Expand Up @@ -95,9 +95,9 @@ def nanargmin_selector(arr, axis):
a = arr
else:
a = np.array(arr, copy=False)
cdef int ndim = a.ndim
cdef np.dtype dtype = a.dtype
cdef int size = a.size
cdef int ndim = PyArray_NDIM(a)
cdef int dtype = PyArray_TYPE(a)
cdef int size = PyArray_SIZE(a)
if size == 0:
msg = "numpy.nanargmin() raises on size=0; so Bottleneck does too."
raise ValueError, msg
Expand All @@ -117,7 +117,7 @@ def nanargmin_selector(arr, axis):
try:
func = nanargmin_slow_dict[axis]
except KeyError:
tup = (str(ndim), str(dtype), str(axis))
tup = (str(ndim), str(a.dtype), str(axis))
raise TypeError, "Unsupported ndim/dtype/axis (%s/%s/%s)." % tup
return func, a

Expand Down Expand Up @@ -732,30 +732,30 @@ def nanargmin_3d_float64_axis2(np.ndarray[np.float64_t, ndim=3] a):
return y

cdef dict nanargmin_dict = {}
nanargmin_dict[(1, int32, 0)] = nanargmin_1d_int32_axis0
nanargmin_dict[(1, int64, 0)] = nanargmin_1d_int64_axis0
nanargmin_dict[(2, int32, 0)] = nanargmin_2d_int32_axis0
nanargmin_dict[(2, int32, 1)] = nanargmin_2d_int32_axis1
nanargmin_dict[(2, int64, 0)] = nanargmin_2d_int64_axis0
nanargmin_dict[(2, int64, 1)] = nanargmin_2d_int64_axis1
nanargmin_dict[(3, int32, 0)] = nanargmin_3d_int32_axis0
nanargmin_dict[(3, int32, 1)] = nanargmin_3d_int32_axis1
nanargmin_dict[(3, int32, 2)] = nanargmin_3d_int32_axis2
nanargmin_dict[(3, int64, 0)] = nanargmin_3d_int64_axis0
nanargmin_dict[(3, int64, 1)] = nanargmin_3d_int64_axis1
nanargmin_dict[(3, int64, 2)] = nanargmin_3d_int64_axis2
nanargmin_dict[(1, float32, 0)] = nanargmin_1d_float32_axis0
nanargmin_dict[(1, float64, 0)] = nanargmin_1d_float64_axis0
nanargmin_dict[(2, float32, 0)] = nanargmin_2d_float32_axis0
nanargmin_dict[(2, float32, 1)] = nanargmin_2d_float32_axis1
nanargmin_dict[(2, float64, 0)] = nanargmin_2d_float64_axis0
nanargmin_dict[(2, float64, 1)] = nanargmin_2d_float64_axis1
nanargmin_dict[(3, float32, 0)] = nanargmin_3d_float32_axis0
nanargmin_dict[(3, float32, 1)] = nanargmin_3d_float32_axis1
nanargmin_dict[(3, float32, 2)] = nanargmin_3d_float32_axis2
nanargmin_dict[(3, float64, 0)] = nanargmin_3d_float64_axis0
nanargmin_dict[(3, float64, 1)] = nanargmin_3d_float64_axis1
nanargmin_dict[(3, float64, 2)] = nanargmin_3d_float64_axis2
nanargmin_dict[(1, NPY_int32, 0)] = nanargmin_1d_int32_axis0
nanargmin_dict[(1, NPY_int64, 0)] = nanargmin_1d_int64_axis0
nanargmin_dict[(2, NPY_int32, 0)] = nanargmin_2d_int32_axis0
nanargmin_dict[(2, NPY_int32, 1)] = nanargmin_2d_int32_axis1
nanargmin_dict[(2, NPY_int64, 0)] = nanargmin_2d_int64_axis0
nanargmin_dict[(2, NPY_int64, 1)] = nanargmin_2d_int64_axis1
nanargmin_dict[(3, NPY_int32, 0)] = nanargmin_3d_int32_axis0
nanargmin_dict[(3, NPY_int32, 1)] = nanargmin_3d_int32_axis1
nanargmin_dict[(3, NPY_int32, 2)] = nanargmin_3d_int32_axis2
nanargmin_dict[(3, NPY_int64, 0)] = nanargmin_3d_int64_axis0
nanargmin_dict[(3, NPY_int64, 1)] = nanargmin_3d_int64_axis1
nanargmin_dict[(3, NPY_int64, 2)] = nanargmin_3d_int64_axis2
nanargmin_dict[(1, NPY_float32, 0)] = nanargmin_1d_float32_axis0
nanargmin_dict[(1, NPY_float64, 0)] = nanargmin_1d_float64_axis0
nanargmin_dict[(2, NPY_float32, 0)] = nanargmin_2d_float32_axis0
nanargmin_dict[(2, NPY_float32, 1)] = nanargmin_2d_float32_axis1
nanargmin_dict[(2, NPY_float64, 0)] = nanargmin_2d_float64_axis0
nanargmin_dict[(2, NPY_float64, 1)] = nanargmin_2d_float64_axis1
nanargmin_dict[(3, NPY_float32, 0)] = nanargmin_3d_float32_axis0
nanargmin_dict[(3, NPY_float32, 1)] = nanargmin_3d_float32_axis1
nanargmin_dict[(3, NPY_float32, 2)] = nanargmin_3d_float32_axis2
nanargmin_dict[(3, NPY_float64, 0)] = nanargmin_3d_float64_axis0
nanargmin_dict[(3, NPY_float64, 1)] = nanargmin_3d_float64_axis1
nanargmin_dict[(3, NPY_float64, 2)] = nanargmin_3d_float64_axis2

cdef dict nanargmin_slow_dict = {}
nanargmin_slow_dict[0] = nanargmin_slow_axis0
Expand Down

0 comments on commit c2163dd

Please sign in to comment.