# scipy/scipy

### Subversion checkout URL

You can clone with
or
.
Fetching contributors…

Cannot retrieve contributors at this time

376 lines (298 sloc) 12.4 KB
 """ A sparse matrix in COOrdinate or 'triplet' format""" __docformat__ = "restructuredtext en" __all__ = ['coo_matrix', 'isspmatrix_coo'] from warnings import warn import numpy as np from sparsetools import coo_tocsr, coo_todense, coo_matvec from base import isspmatrix from data import _data_matrix from sputils import upcast, to_native, isshape, getdtype class coo_matrix(_data_matrix): """ A sparse matrix in COOrdinate format. Also known as the 'ijv' or 'triplet' format. This can be instantiated in several ways: coo_matrix(D) with a dense matrix D coo_matrix(S) with another sparse matrix S (equivalent to S.tocoo()) coo_matrix((M, N), [dtype]) to construct an empty matrix with shape (M, N) dtype is optional, defaulting to dtype='d'. coo_matrix((data, ij), [shape=(M, N)]) The arguments 'data' and 'ij' represent three arrays: 1. data[:] the entries of the matrix, in any order 2. ij[0][:] the row indices of the matrix entries 3. ij[1][:] the column indices of the matrix entries Where ``A[ij[0][k], ij[1][k] = data[k]``. When shape is not specified, it is inferred from the index arrays Notes ----- Advantages of the COO format - facilitates fast conversion among sparse formats - permits duplicate entries (see example) - very fast conversion to and from CSR/CSC formats Disadvantages of the COO format - does not directly support: + arithmetic operations + slicing Intended Usage - COO is a fast format for constructing sparse matrices - Once a matrix has been constructed, convert to CSR or CSC format for fast arithmetic and matrix vector operations - By default when converting to CSR or CSC format, duplicate (i,j) entries will be summed together. This facilitates efficient construction of finite element matrices and the like. (see example) Examples -------- >>> from scipy.sparse import * >>> from scipy import * >>> coo_matrix( (3,4), dtype=int8 ).todense() matrix([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8) >>> row = array([0,3,1,0]) >>> col = array([0,3,1,2]) >>> data = array([4,5,7,9]) >>> coo_matrix( (data,(row,col)), shape=(4,4) ).todense() matrix([[4, 0, 9, 0], [0, 7, 0, 0], [0, 0, 0, 0], [0, 0, 0, 5]]) >>> # example with duplicates >>> row = array([0,0,1,3,1,0,0]) >>> col = array([0,2,1,3,1,0,0]) >>> data = array([1,1,1,1,1,1,1]) >>> coo_matrix( (data,(row,col)), shape=(4,4)).todense() matrix([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) """ def __init__(self, arg1, shape=None, dtype=None, copy=False, dims=None): _data_matrix.__init__(self) if dims is not None: warn("dims is deprecated, use shape instead", DeprecationWarning) shape=dims if isinstance(arg1, tuple): if isshape(arg1): M, N = arg1 self.shape = (M,N) self.row = np.array([], dtype=np.intc) self.col = np.array([], dtype=np.intc) self.data = np.array([], getdtype(dtype, default=float)) else: try: obj, ij = arg1 except: raise TypeError('invalid input format') try: if len(ij) != 2: raise TypeError except TypeError: raise TypeError('invalid input format') self.row = np.array(ij[0], copy=copy, dtype=np.intc) self.col = np.array(ij[1], copy=copy, dtype=np.intc) self.data = np.array( obj, copy=copy) if shape is None: if len(self.row) == 0 or len(self.col) == 0: raise ValueError('cannot infer dimensions from zero sized index arrays') M = self.row.max() + 1 N = self.col.max() + 1 self.shape = (M, N) else: # Use 2 steps to ensure shape has length 2. M, N = shape self.shape = (M, N) elif arg1 is None: # Initialize an empty matrix. if not isinstance(shape, tuple) or not isintlike(shape[0]): raise TypeError('dimensions not understood') warn('coo_matrix(None, shape=(M,N)) is deprecated, ' \ 'use coo_matrix( (M,N) ) instead', DeprecationWarning) self.shape = shape self.data = np.array([], getdtype(dtype, default=float)) self.row = np.array([], dtype=np.intc) self.col = np.array([], dtype=np.intc) else: if isspmatrix(arg1): if isspmatrix_coo(arg1) and copy: self.row = arg1.row.copy() self.col = arg1.col.copy() self.data = arg1.data.copy() self.shape = arg1.shape else: coo = arg1.tocoo() self.row = coo.row self.col = coo.col self.data = coo.data self.shape = coo.shape else: #dense argument try: M = np.atleast_2d(np.asarray(arg1)) except: raise TypeError('invalid input format') if np.rank(M) != 2: raise TypeError('expected rank <= 2 array or matrix') self.shape = M.shape self.row,self.col = (M != 0).nonzero() self.data = M[self.row,self.col] if dtype is not None: self.data = self.data.astype(dtype) self._check() def getnnz(self): nnz = len(self.data) if nnz != len(self.row) or nnz != len(self.col): raise ValueError('row, column, and data array must all be the same length') if np.rank(self.data) != 1 or np.rank(self.row) != 1 or np.rank(self.col) != 1: raise ValueError('row, column, and data arrays must have rank 1') return nnz nnz = property(fget=getnnz) def _check(self): """ Checks data structure for consistency """ nnz = self.nnz # index arrays should have integer data types if self.row.dtype.kind != 'i': warn("row index array has non-integer dtype (%s) " \ % self.row.dtype.name ) if self.col.dtype.kind != 'i': warn("col index array has non-integer dtype (%s) " \ % self.col.dtype.name ) # only support 32-bit ints for now self.row = np.asarray(self.row, dtype=np.intc) self.col = np.asarray(self.col, dtype=np.intc) self.data = to_native(self.data) if nnz > 0: if self.row.max() >= self.shape[0]: raise ValueError('row index exceedes matrix dimensions') if self.col.max() >= self.shape[1]: raise ValueError('column index exceedes matrix dimensions') if self.row.min() < 0: raise ValueError('negative row index found') if self.col.min() < 0: raise ValueError('negative column index found') @np.deprecate def rowcol(self, num): return (self.row[num], self.col[num]) @np.deprecate def getdata(self, num): return self.data[num] def transpose(self, copy=False): M,N = self.shape return coo_matrix((self.data, (self.col, self.row)), shape=(N,M), copy=copy) def toarray(self): B = np.zeros(self.shape, dtype=self.dtype) M,N = self.shape coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel()) return B def tocsc(self): """Return a copy of this matrix in Compressed Sparse Column format Duplicate entries will be summed together. Example ------- >>> from numpy import array >>> from scipy.sparse import coo_matrix >>> row = array([0,0,1,3,1,0,0]) >>> col = array([0,2,1,3,1,0,0]) >>> data = array([1,1,1,1,1,1,1]) >>> A = coo_matrix( (data,(row,col)), shape=(4,4)).tocsc() >>> A.todense() matrix([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) """ from csc import csc_matrix if self.nnz == 0: return csc_matrix(self.shape, dtype=self.dtype) else: M,N = self.shape indptr = np.empty(N + 1, dtype=np.intc) indices = np.empty(self.nnz, dtype=np.intc) data = np.empty(self.nnz, dtype=upcast(self.dtype)) coo_tocsr(N, M, self.nnz, \ self.col, self.row, self.data, \ indptr, indices, data) A = csc_matrix((data, indices, indptr), shape=self.shape) A.sum_duplicates() return A def tocsr(self): """Return a copy of this matrix in Compressed Sparse Row format Duplicate entries will be summed together. Example ------- >>> from numpy import array >>> from scipy.sparse import coo_matrix >>> row = array([0,0,1,3,1,0,0]) >>> col = array([0,2,1,3,1,0,0]) >>> data = array([1,1,1,1,1,1,1]) >>> A = coo_matrix( (data,(row,col)), shape=(4,4)).tocsr() >>> A.todense() matrix([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) """ from csr import csr_matrix if self.nnz == 0: return csr_matrix(self.shape, dtype=self.dtype) else: M,N = self.shape indptr = np.empty(M + 1, dtype=np.intc) indices = np.empty(self.nnz, dtype=np.intc) data = np.empty(self.nnz, dtype=upcast(self.dtype)) coo_tocsr(M, N, self.nnz, \ self.row, self.col, self.data, \ indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=self.shape) A.sum_duplicates() return A def tocoo(self, copy=False): if copy: return self.copy() else: return self def todia(self): from dia import dia_matrix ks = self.col - self.row #the diagonal for each nonzero diags = np.unique(ks) if len(diags) > 100: #probably undesired, should we do something? #should todia() have a maxdiags parameter? pass #initialize and fill in data array data = np.zeros( (len(diags), self.col.max()+1), dtype=self.dtype) data[ np.searchsorted(diags,ks), self.col ] = self.data return dia_matrix((data,diags), shape=self.shape) def todok(self): from itertools import izip from dok import dok_matrix dok = dok_matrix((self.shape), dtype=self.dtype) dok.update( izip(izip(self.row,self.col),self.data) ) return dok # needed by _data_matrix def _with_data(self,data,copy=True): """Returns a matrix with the same sparsity structure as self, but with different data. By default the index arrays (i.e. .row and .col) are copied. """ if copy: return coo_matrix( (data, (self.row.copy(), self.col.copy()) ), \ shape=self.shape, dtype=data.dtype) else: return coo_matrix( (data, (self.row, self.col) ), \ shape=self.shape, dtype=data.dtype) ########################### # Multiplication handlers # ########################### def _mul_vector(self, other): #output array result = np.zeros( self.shape[0], dtype=upcast(self.dtype,other.dtype) ) coo_matvec(self.nnz, self.row, self.col, self.data, other, result) return result def _mul_multivector(self, other): return np.hstack( [ self._mul_vector(col).reshape(-1,1) for col in other.T ] ) from sputils import _isinstance def isspmatrix_coo( x ): return _isinstance(x, coo_matrix)
Something went wrong with that request. Please try again.