Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sparse: Add "order" and "out" arguments to todense() and toarray() #229

Merged
merged 8 commits into from
Jun 5, 2012
77 changes: 73 additions & 4 deletions scipy/sparse/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,11 +468,67 @@ def getrow(self, i):
#def __array__(self):
# return self.toarray()

def todense(self):
return np.asmatrix(self.toarray())
def todense(self, order=None, out=None):
"""
Return a dense matrix representation of this matrix.

Parameters
----------
order : {'C', 'F'}, optional
Whether to store multi-dimensional data in C (row-major)
or Fortran (column-major) order in memory. The default
is 'None', indicating the NumPy default of C-ordered.
Cannot be specified in conjunction with the `out`
argument.

out : ndarray, 2-dimensional, optional
If specified, uses this array (or `numpy.matrix`) as the
output buffer instead of allocating a new array to
return. The provided array must have the same shape and
dtype as the sparse matrix on which you are calling the
method.

Returns
-------
arr : numpy.matrix, 2-dimensional
A NumPy matrix object with the same shape and containing
the same data represented by the sparse matrix, with the
requested memory order. If `out` was passed and was an
array (rather than a `numpy.matrix`), it will be filled
with the appropriate values and returned wrapped in a
`numpy.matrix` object that shares the same memory.
"""
return np.asmatrix(self.toarray(order=order, out=out))

def toarray(self, order=None, out=None):
"""
Return a dense ndarray representation of this matrix.

def toarray(self):
return self.tocoo().toarray()
Parameters
----------
order : {'C', 'F'}, optional
Whether to store multi-dimensional data in C (row-major)
or Fortran (column-major) order in memory. The default
is 'None', indicating the NumPy default of C-ordered.
Cannot be specified in conjunction with the `out`
argument.

out : ndarray, 2-dimensional, optional
If specified, uses this array as the output buffer
instead of allocating a new array to return. The provided
array must have the same shape and dtype as the sparse
matrix on which you are calling the method.

Returns
-------
arr : ndarray, 2-dimensional
An array with the same shape and containing the same
data represented by the sparse matrix, with the requested
memory order. If `out` was passed, the same object is
returned after being modified in-place to contain the
appropriate values.
"""
return self.tocoo().toarray(order=order, out=out)

def todok(self):
return self.tocoo().todok()
Expand Down Expand Up @@ -556,6 +612,19 @@ def setdiag(self, values, k=0):
for i,v in enumerate(values[:max_index]):
self[i, i + k] = v

def _process_toarray_args(self, order, out):
if out is not None:
if order is not None:
raise ValueError('order cannot be specified if out '
'is not None')
if out.shape != self.shape or out.dtype != self.dtype:
raise ValueError('out array must be same dtype and shape as '
'sparse matrix')
out[...] = 0.
return out
else:
return np.zeros(self.shape, dtype=self.dtype, order=order)


def isspmatrix(x):
return isinstance(x, spmatrix)
Expand Down
5 changes: 3 additions & 2 deletions scipy/sparse/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,8 +554,9 @@ def tocoo(self,copy=True):
from coo import coo_matrix
return coo_matrix((data,(row,col)), self.shape)

def toarray(self):
return self.tocoo(copy=False).toarray()
def toarray(self, order=None, out=None):
"""See the docstring for `spmatrix.toarray`."""
return self.tocoo(copy=False).toarray(order=order, out=out)

##############################################################
# methods that examine or modify the internal data structure #
Expand Down
9 changes: 6 additions & 3 deletions scipy/sparse/coo.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,13 @@ def transpose(self, copy=False):
M,N = self.shape
return coo_matrix((self.data, (self.col, self.row)), shape=(N,M), copy=copy)

def toarray(self):
B = np.zeros(self.shape, dtype=self.dtype)
def toarray(self, order=None, out=None):
"""See the docstring for `spmatrix.toarray`."""
B = self._process_toarray_args(order, out)
fortran = int(B.flags.f_contiguous)
M,N = self.shape
coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel())
coo_todense(M, N, self.nnz, self.row, self.col, self.data,
B.ravel(order='A'), fortran)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there's a problem here: you never check whether B is contiguous. If it's not, then B.ravel() will return a copy and the output won't be written to the original array.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Tue, Jun 05, 2012 at 11:07:34AM -0700, Jacob Vanderplas wrote:

     M,N = self.shape
  •    coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel())
    
  •    coo_todense(M, N, self.nnz, self.row, self.col, self.data,
    
  •                B.ravel(order='A'), fortran)
    

I think there's a problem here: you never check whether B is contiguous. If it's not, then B.ravel() will return a copy and the output won't be written to the original array.

Good catch. f2py has this annoying problem as well for non-Fortran
contiguous inputs, I wouldn't want to accidentally introduce more of that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most sparse types go through coo_matrix as an intermediate step, but some
(mostly just lil) do not. I'll update the docstring to say "most sparse
types" require a C- or F- contiguous out array.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer a value error if the "out" array is not contiguous. This feature is for users who know exactly what they want in terms of memory management: if they're doing something wrong, they need to know.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I added that too. I just thought the docs should reflect this reality as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the changes in 06cc844.

return B

def tocsc(self):
Expand Down
5 changes: 3 additions & 2 deletions scipy/sparse/dok.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,9 @@ def tocsc(self):
""" Return a copy of this matrix in Compressed Sparse Column format"""
return self.tocoo().tocsc()

def toarray(self):
return self.tocoo().toarray()
def toarray(self, order=None, out=None):
"""See the docstring for `spmatrix.toarray`."""
return self.tocoo().toarray(order=order, out=out)

def resize(self, shape):
""" Resize the matrix in-place to dimensions given by 'shape'.
Expand Down
5 changes: 3 additions & 2 deletions scipy/sparse/lil.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,9 @@ def reshape(self,shape):
new[new_r,new_c] = self[i,j]
return new

def toarray(self):
d = np.zeros(self.shape, dtype=self.dtype)
def toarray(self, order=None, out=None):
"""See the docstring for `spmatrix.toarray`."""
d = self._process_toarray_args(order, out)
for i, row in enumerate(self.rows):
for pos, j in enumerate(row):
d[i, j] = self.data[i][pos]
Expand Down
14 changes: 11 additions & 3 deletions scipy/sparse/sparsetools/coo.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,18 @@ void coo_todense(const I n_row,
const I Ai[],
const I Aj[],
const T Ax[],
T Bx[])
T Bx[],
int fortran)
{
for(I n = 0; n < nnz; n++){
Bx[ n_col * Ai[n] + Aj[n] ] += Ax[n];
if (!fortran) {
for(I n = 0; n < nnz; n++){
Bx[ n_col * Ai[n] + Aj[n] ] += Ax[n];
}
}
else {
for(I n = 0; n < nnz; n++){
Bx[ n_row * Aj[n] + Ai[n] ] += Ax[n];
}
}
}

Expand Down