Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

ENH: sparse: speedup LIL matrix slicing #3338

Open
wants to merge 5 commits into from

4 participants

@pv
Owner
pv commented

The current implementation for slicing LIL matrices is very pessimal. Replace it with a more optimal implementation.

Gives a factor of ~1/density speedup:

In []: A = rand(3000, 3000, density=1e-3, format="lil")

# Before (9ccc68475)
In [4]: %timeit A[::2,::-2]
10 loops, best of 3: 87.5 ms per loop

# After (d47e27b0)
In [4]: %timeit A[::2,::-2]
1000 loops, best of 3: 603 µs per loop

Gives speedups also for smaller slices:

# Before
In [5]: %timeit A[3,:]
1000 loops, best of 3: 213 µs per loop
In [6]: %timeit A[:,3]
1000 loops, best of 3: 901 µs per loop
In [7]: %timeit A[1:5,1:5]
10000 loops, best of 3: 83.3 µs per loop

# After
In [5]: %timeit A[3,:]
10000 loops, best of 3: 38.8 µs per loop
In [6]: %timeit A[:,3]
1000 loops, best of 3: 702 µs per loop
In [7]: %timeit A[1:5,1:5]
10000 loops, best of 3: 40.3 µs per loop

(Column slices in LIL are still problematic, but that's due to the matrix format.)

EDIT updated benchmarks

@pv
Owner
pv commented

TBH, the LIL/DOK matrices should be reimplemented e.g. in Cython, with appropriate low-level data storage.

@coveralls

Coverage Status

Coverage remained the same when pulling 63988b9 on pv:lil-speed into 2df405a on scipy:master.

@pv pv added the PR label
@coveralls

Coverage Status

Coverage remained the same when pulling 77929e5 on pv:lil-speed into 2df405a on scipy:master.

@rgommers
Owner

Guess this can be closed?

@pv
Owner
pv commented

No, this is a different optimization.
Probably needs re-benchmarking and reimplementation in Cython.

@pv
Owner
pv commented

Rebased and cythonized.

@coveralls

Coverage Status

Changes Unknown when pulling d47e27b on pv:lil-speed into ** on scipy:master**.

@jnothman jnothman commented on the diff
scipy/sparse/lil.py
@@ -220,11 +220,30 @@ def getrowview(self, i):
def getrow(self, i):
"""Returns a copy of the 'i'th row.
"""
+ if i < 0:
@jnothman
jnothman added a note

Is there a reason this is inlined rather than using _check_row_bounds?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@jnothman jnothman commented on the diff
scipy/sparse/lil.py
@@ -220,11 +220,30 @@ def getrowview(self, i):
def getrow(self, i):
"""Returns a copy of the 'i'th row.
"""
+ if i < 0:
+ i += self.shape[0]
+ if i < 0 or i >= self.shape[0]:
+ raise IndexError('row index out of bounds')
@jnothman
jnothman added a note

There's no test coverage for this line.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@jnothman

Otherwise, this LGTM

@pv pv removed the PR label
@rgommers
Owner

This needs only a rebase and a very minor tweak it looks like. Time to get it in?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
118 scipy/sparse/_csparsetools.pyx
@@ -306,6 +306,85 @@ cpdef _lil_fancy_set(cnp.npy_intp M, cnp.npy_intp N,
_lil_insert[value_t](M, N, rows, data, i, j, values[x, y])
+def lil_get_row_ranges(cnp.npy_intp M, cnp.npy_intp N,
+ object[:] rows, object[:] datas,
+ object[:] new_rows, object[:] new_datas,
+ object irows,
+ cnp.npy_intp j_start,
+ cnp.npy_intp j_stop,
+ cnp.npy_intp j_stride,
+ cnp.npy_intp nj):
+ """
+ Column-slicing fast path for LIL matrices.
+
+ Extracts values from rows/datas and inserts in to
+ new_rows/new_datas.
+
+ Parameters
+ ----------
+ M, N
+ Shape of input array
+ rows, datas
+ LIL data for input array, shape (M, N)
+ new_rows, new_datas
+ LIL data for output array, shape (len(irows), nj)
+ irows : iterator
+ Iterator yielding row indices
+ j_start, j_stop, j_stride
+ Column range(j_start, j_stop, j_stride) to get
+ nj : int
+ Number of columns corresponding to j_* variables.
+
+ """
+ cdef cnp.npy_intp nk, k, j, a, b, m, r, p
+ cdef list cur_row, cur_data, new_row, new_data
+
+ if j_stride == 0:
+ raise ValueError("cannot index with zero stride")
+
+ for nk, k in enumerate(irows):
+ if k >= M or k < -M:
+ raise ValueError("row index %d out of bounds" % (k,))
+ if k < 0:
+ k += M
+
+ if j_stride == 1 and nj == N:
+ # full row slice
+ new_rows[nk] = list(rows[k])
+ new_datas[nk] = list(datas[k])
+ else:
+ # partial row slice
+ cur_row = rows[k]
+ cur_data = datas[k]
+ new_row = new_rows[nk]
+ new_data = new_datas[nk]
+
+ if j_stride > 0:
+ a = bisect_left(cur_row, j_start)
+ for m in range(a, len(cur_row)):
+ j = cur_row[m]
+ if j >= j_stop:
+ break
+ r = (j - j_start) % j_stride
+ if r != 0:
+ continue
+ p = (j - j_start) // j_stride
+ new_row.append(p)
+ new_data.append(cur_data[m])
+ else:
+ a = bisect_right(cur_row, j_stop)
+ for m in range(a, len(cur_row)):
+ j = cur_row[m]
+ if j > j_start:
+ break
+ r = (j - j_start) % j_stride
+ if r != 0:
+ continue
+ p = (j - j_start) // j_stride
+ new_row.insert(0, p)
+ new_data.insert(0, cur_data[m])
+
+
cdef lil_insertat_nocheck(list row, list data, cnp.npy_intp j, object x):
"""
Insert a single item to LIL matrix.
@@ -359,7 +438,7 @@ cdef lil_deleteat_nocheck(list row, list data, cnp.npy_intp j):
@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
-cdef bisect_left(list a, cnp.npy_intp x):
+cdef cnp.npy_intp bisect_left(list a, cnp.npy_intp x) except -1:
"""
Bisection search in a sorted list.
@@ -391,3 +470,40 @@ cdef bisect_left(list a, cnp.npy_intp x):
else:
hi = mid
return lo
+
+
+@cython.cdivision(True)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef cnp.npy_intp bisect_right(list a, cnp.npy_intp x) except -1:
+ """
+ Bisection search in a sorted list.
+
+ List is assumed to contain objects castable to integers.
+
+ Parameters
+ ----------
+ a
+ List to search in
+ x
+ Value to search for
+
+ Returns
+ -------
+ j : int
+ Index immediately at the right of the value (if present), or at
+ the point to which it can be inserted maintaining order.
+
+ """
+ cdef cnp.npy_intp hi = len(a)
+ cdef cnp.npy_intp lo = 0
+ cdef cnp.npy_intp mid, v
+
+ while lo < hi:
+ mid = (lo + hi)//2
+ v = a[mid]
+ if x < v:
+ hi = mid
+ else:
+ lo = mid + 1
+ return lo
View
175 scipy/sparse/benchmarks/bench_sparse.py
@@ -3,6 +3,7 @@
import time
import warnings
+import itertools
import numpy
import numpy as np
@@ -291,59 +292,50 @@ def bench_conversion(self):
print(output)
- def _getset_bench(self, kernel, formats):
- print('==========================================================')
- print(' N | s.patt. |' + ''.join(' %7s |' % fmt for fmt in formats))
- print('----------------------------------------------------------')
-
- A = rand(1000, 1000, density=1e-5)
-
- for N in [1, 10, 100, 1000, 10000]:
- for spat in [False, True]:
+ def _getset_bench(self, kernel, getargs, formats, Ns, spats=(False, True),
+ Ms=(1000,), densities=(1e-5,), msg=''):
+ print()
+ print(' MxM sparse matrix %s' % (msg,))
+ print('='*80)
+ print('|'.join(' %7s ' % x for x in (['N', 's.patt.', 'dens.', 'M'] + formats)))
+ print('-'*80)
+
+ for M, density in itertools.product(Ms, densities):
+ np.random.seed(1234)
+ A = rand(M, M, density=density)
+ for N, spat in itertools.product(Ns, spats):
# indices to assign to
- i, j = [], []
- while len(i) < N:
- n = N - len(i)
- ip = np.random.randint(0, A.shape[0], size=n)
- jp = np.random.randint(0, A.shape[1], size=n)
- i = np.r_[i, ip]
- j = np.r_[j, jp]
- v = np.random.rand(n)
-
- if N == 1:
- i = int(i)
- j = int(j)
- v = float(v)
-
+ kernel_args = getargs(A, N)
times = []
for fmt in formats:
- if fmt == 'dok' and N > 500:
- times.append(None)
- continue
-
base = A.asformat(fmt)
m = base.copy()
if spat:
- kernel(m, i, j, v)
+ kernel(m, *kernel_args)
with warnings.catch_warnings():
warnings.simplefilter('ignore', SparseEfficiencyWarning)
iter = 0
total_time = 0
+ clock_start = time.clock()
while total_time < 0.2 and iter < 5000:
if not spat:
m = base.copy()
a = time.clock()
- kernel(m, i, j, v)
- total_time += time.clock() - a
+ kernel(m, *kernel_args)
+ t = time.clock()
+ total_time += t - a
iter += 1
+ if t - clock_start > 1:
+ break
times.append(total_time/float(iter))
- output = " %6d | %7s " % (N, "same" if spat else "change")
+ output = " %7d | %7s | %7g | %7d " % (N, "same" if spat else "change",
+ density, M)
for t in times:
if t is None:
output += '| n/a '
@@ -351,19 +343,116 @@ def _getset_bench(self, kernel, formats):
output += '| %5.2fms ' % (1e3*t)
print(output)
- def bench_setitem(self):
- def kernel(A, i, j, v):
- A[i, j] = v
- print()
- print(' Sparse Matrix fancy __setitem__')
- self._getset_bench(kernel, ['csr', 'csc', 'lil', 'dok'])
+ def _getset_set_kernel(self, A, i, j, v):
+ A[i, j] = v
+
+ def _getset_get_kernel(self, A, i, j, v=None):
+ A[i, j]
+
+ def _getset_args_random(self, A, N):
+ np.random.seed(1234)
+ i, j = [], []
+ while len(i) < N:
+ n = N - len(i)
+ ip = np.random.randint(0, A.shape[0], size=n)
+ jp = np.random.randint(0, A.shape[1], size=n)
+ i = np.r_[i, ip]
+ j = np.r_[j, jp]
+ v = np.random.rand(n)
+ if N == 1:
+ i = int(i)
+ j = int(j)
+ v = float(v)
+ return i, j, v
+
+ def bench_setitem_random(self):
+ self._getset_bench(self._getset_set_kernel, self._getset_args_random,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1, 10, 100, 1000, 10000],
+ msg='__setitem__[i, j]; len(i) = len(j) = N; random indices')
+
+ def bench_getitem_random(self):
+ self._getset_bench(self._getset_get_kernel, self._getset_args_random,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1, 10, 100, 1000, 10000],
+ spats=[False],
+ msg='__getitem__[i, j]; len(i) = len(j) = N; random indices')
+
+ def _getset_args_fullrows(self, A, N):
+ np.random.seed(1234)
+ N //= A.shape[0]
+ i, j = [], slice(None)
+ while len(i) < N:
+ n = N - len(i)
+ ip = np.random.randint(0, A.shape[0], size=n)
+ i = np.r_[i, ip]
+ v = np.random.rand(n, A.shape[1])
+ if N == 1:
+ i = int(i)
+ return i, j, v
+
+ def bench_setitem_fullrow(self):
+ self._getset_bench(self._getset_set_kernel, self._getset_args_fullrows,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1000, 10000, 100000],
+ densities=(1e-5, 1e-2, 0.5),
+ msg="__setitem__[idx,:]; len(idx)*M == N",
+ )
+
+ def bench_getitem_fullrow(self):
+ self._getset_bench(self._getset_get_kernel, self._getset_args_fullrows,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1000, 10000, 100000],
+ densities=(1e-5, 1e-2, 0.5),
+ spats=[False],
+ msg="__getitem__[idx,:]; len(idx)*M == N")
+
+ def _getset_args_fullcols(self, A, N):
+ i, j, v = self._getset_args_fullrows(A, N)
+ return j, i, v.T
+
+ def bench_setitem_fullcol(self):
+ self._getset_bench(self._getset_set_kernel, self._getset_args_fullcols,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1000, 10000, 100000],
+ densities=(1e-5, 1e-2, 0.5),
+ msg="__setitem__[:,idx]; len(idx)*M == N",
+ )
+
+ def bench_getitem_fullcol(self):
+ self._getset_bench(self._getset_get_kernel, self._getset_args_fullcols,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1000, 10000, 100000],
+ densities=(1e-5, 1e-2, 0.5),
+ spats=[False],
+ msg="__getitem__[:,idx]; len(idx)*M == N")
+
+ def _getset_args_slices(self, A, N):
+ np.random.seed(1234)
+ k = max(1, int(np.sqrt(N)))
+ i = slice((A.shape[0]-k)//2, (A.shape[0]+k)//2)
+ j = slice((A.shape[0]-k)//2, (A.shape[0]+k)//2)
+ a = xrange(*i.indices(A.shape[0]))
+ b = xrange(*j.indices(A.shape[0]))
+ v = np.random.rand(len(a), len(b))
+ return i, j, v
+
+ def bench_setitem_slices(self):
+ self._getset_bench(self._getset_set_kernel, self._getset_args_slices,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1, 10, 100, 1000, 10000, 100000],
+ densities=(1e-5, 1e-2, 0.5),
+ msg="__setitem__[a:b,c:d]; (b-a)*(d-c) = N",
+ )
+
+ def bench_getitem_slices(self):
+ self._getset_bench(self._getset_get_kernel, self._getset_args_slices,
+ ['csr', 'csc', 'lil', 'dok'],
+ [1, 10, 100, 1000, 10000, 100000],
+ densities=(1e-5, 1e-2, 0.5),
+ spats=[False],
+ msg="__getitem__[a:b,c:d]; (b-a)*(d-c) = N")
- def bench_getitem(self):
- def kernel(A, i, j, v=None):
- A[i, j]
- print()
- print(' Sparse Matrix fancy __getitem__')
- self._getset_bench(kernel, ['csr', 'csc', 'lil'])
# class TestLarge(TestCase):
# def bench_large(self):
View
78 scipy/sparse/lil.py
@@ -7,14 +7,14 @@
__all__ = ['lil_matrix','isspmatrix_lil']
-from bisect import bisect_left
+from bisect import bisect_left, bisect_right
import numpy as np
-from scipy.lib.six import xrange
+from scipy.lib.six import xrange, zip as izip
from .base import spmatrix, isspmatrix
from .sputils import getdtype, isshape, issequence, isscalarlike, ismatrix, \
- IndexMixin, upcast_scalar, get_index_dtype
+ IndexMixin, upcast_scalar, get_index_dtype, isintlike
from warnings import warn
from .base import SparseEfficiencyWarning
@@ -220,11 +220,30 @@ def getrowview(self, i):
def getrow(self, i):
"""Returns a copy of the 'i'th row.
"""
+ if i < 0:
@jnothman
jnothman added a note

Is there a reason this is inlined rather than using _check_row_bounds?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ i += self.shape[0]
+ if i < 0 or i >= self.shape[0]:
+ raise IndexError('row index out of bounds')
@jnothman
jnothman added a note

There's no test coverage for this line.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+
new = lil_matrix((1, self.shape[1]), dtype=self.dtype)
new.rows[0] = self.rows[i][:]
new.data[0] = self.data[i][:]
return new
+ def _check_row_bounds(self, i):
+ if i < 0:
+ i += self.shape[0]
+ if i < 0 or i >= self.shape[0]:
+ raise IndexError('row index out of bounds')
+ return i
+
+ def _check_col_bounds(self, j):
+ if j < 0:
+ j += self.shape[1]
+ if j < 0 or j >= self.shape[1]:
+ raise IndexError('column index out of bounds')
+ return j
+
def __getitem__(self, index):
"""Return the element(s) index=(i, j), where j may be a slice.
This always returns a copy for consistency, since slices into
@@ -248,11 +267,33 @@ def __getitem__(self, index):
i, j = self._unpack_index(index)
# Proper check for other scalar index types
- if isscalarlike(i) and isscalarlike(j):
+ i_intlike = isintlike(i)
+ j_intlike = isintlike(j)
+
+ if i_intlike and j_intlike:
v = _csparsetools.lil_get1(self.shape[0], self.shape[1],
self.rows, self.data,
i, j)
return self.dtype.type(v)
+ elif j_intlike or isinstance(j, slice):
+ # column slicing fast path
+ if j_intlike:
+ j = self._check_col_bounds(j)
+ j = slice(j, j+1)
+
+ if i_intlike:
+ i = self._check_row_bounds(i)
+ i = xrange(i, i+1)
+ i_shape = None
+ elif isinstance(i, slice):
+ i = xrange(*i.indices(self.shape[0]))
+ i_shape = None
+ else:
+ i = np.atleast_1d(i)
+ i_shape = i.shape
+
+ if i_shape is None or len(i_shape) == 1:
+ return self._get_row_ranges(i, j)
i, j = self._index_to_arrays(i, j)
if i.size == 0:
@@ -267,6 +308,35 @@ def __getitem__(self, index):
i, j)
return new
+ def _get_row_ranges(self, rows, col_slice):
+ """
+ Fast path for indexing in the case where column index is slice.
+
+ This gains performance improvement over brute force by more
+ efficient skipping of zeros, by accessing the elements
+ column-wise in order.
+
+ Parameters
+ ----------
+ rows : sequence or xrange
+ Rows indexed. If xrange, must be within valid bounds.
+ col_slice : slice
+ Columns indexed
+
+ """
+ j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
+ col_range = xrange(j_start, j_stop, j_stride)
+ nj = len(col_range)
+ new = lil_matrix((len(rows), nj), dtype=self.dtype)
+
+ _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
+ self.rows, self.data,
+ new.rows, new.data,
+ rows,
+ j_start, j_stop, j_stride, nj)
+
+ return new
+
def __setitem__(self, index, x):
# Scalar fast path first
if isinstance(index, tuple) and len(index) == 2:
Something went wrong with that request. Please try again.