diff --git a/scipy/sparse/_base.py b/scipy/sparse/_base.py index 0e144fb20142..42f6bc789bac 100644 --- a/scipy/sparse/_base.py +++ b/scipy/sparse/_base.py @@ -67,7 +67,15 @@ class _spbase: __array_priority__ = 10.1 _format = 'und' # undefined - ndim = 2 + + @property + def ndim(self) -> int: + return len(self._shape) + + @property + def _shape_as_2d(self): + s = self._shape + return (1, s[-1]) if len(s) == 1 else s @property def _bsr_container(self): @@ -568,7 +576,10 @@ def _mul_dispatch(self, other): # This method has to be different from `__matmul__` because it is also # called by sparse matrix classes. - M, N = self.shape + # Currently matrix multiplication is only supported + # for 2D arrays. Hence we unpacked and use only the + # two last axes' lengths. + M, N = self._shape_as_2d if other.__class__ is np.ndarray: # Fast path for the most common case @@ -586,6 +597,8 @@ def _mul_dispatch(self, other): if issparse(other): if self.shape[1] != other.shape[0]: raise ValueError('dimension mismatch') + if other.ndim == 1: + raise ValueError('Cannot yet multiply a 1d sparse array') return self._mul_sparse_matrix(other) # If it's a list or whatever, treat it like an array diff --git a/scipy/sparse/_compressed.py b/scipy/sparse/_compressed.py index a679f333ed95..85196dfe6d6b 100644 --- a/scipy/sparse/_compressed.py +++ b/scipy/sparse/_compressed.py @@ -376,6 +376,8 @@ def multiply(self, other): if self.shape == other.shape: other = self.__class__(other) return self._binopt(other, '_elmul_') + if other.ndim == 1: + raise TypeError("broadcast from a 1d array not yet supported") # Single element. elif other.shape == (1, 1): return self._mul_scalar(other.toarray()[0, 0]) diff --git a/scipy/sparse/_construct.py b/scipy/sparse/_construct.py index abf529ccbc6b..1d8684947bee 100644 --- a/scipy/sparse/_construct.py +++ b/scipy/sparse/_construct.py @@ -1043,14 +1043,15 @@ def block_diag(mats, format=None, dtype=None): c_idx = 0 for a in mats: if isinstance(a, (list, numbers.Number)): - a = coo_array(a) - nrows, ncols = a.shape + a = coo_array(np.atleast_2d(a)) if issparse(a): a = a.tocoo() + nrows, ncols = a._shape_as_2d row.append(a.row + r_idx) col.append(a.col + c_idx) data.append(a.data) else: + nrows, ncols = a.shape a_row, a_col = np.divmod(np.arange(nrows*ncols), ncols) row.append(a_row + r_idx) col.append(a_col + c_idx) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 5fe0c2ab038e..b30fb8edf8d0 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -4,6 +4,7 @@ __all__ = ['coo_array', 'coo_matrix', 'isspmatrix_coo'] +import math from warnings import warn import numpy as np @@ -13,7 +14,7 @@ from ._base import issparse, SparseEfficiencyWarning, _spbase, sparray from ._data import _data_matrix, _minmax_mixin from ._sputils import (upcast, upcast_char, to_native, isshape, getdtype, - getdata, downcast_intp_index, + getdata, downcast_intp_index, get_index_dtype, check_shape, check_reshape_kwargs) import operator @@ -24,73 +25,69 @@ class _coo_base(_data_matrix, _minmax_mixin): def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) + is_array = isinstance(self, sparray) if isinstance(arg1, tuple): - if isshape(arg1): - M, N = arg1 - self._shape = check_shape((M, N)) - idx_dtype = self._get_index_dtype(maxval=max(M, N)) + if isshape(arg1, allow_1d=is_array): + self._shape = check_shape(arg1, allow_1d=is_array) + idx_dtype = self._get_index_dtype(maxval=max(self._shape)) data_dtype = getdtype(dtype, default=float) - self.row = np.array([], dtype=idx_dtype) - self.col = np.array([], dtype=idx_dtype) + self.indices = tuple(np.array([], dtype=idx_dtype) + for _ in range(len(self._shape))) self.data = np.array([], dtype=data_dtype) self.has_canonical_format = True else: try: - obj, (row, col) = arg1 + obj, indices = arg1 except (TypeError, ValueError) as e: raise TypeError('invalid input format') from e if shape is None: - if len(row) == 0 or len(col) == 0: + if any(len(idx) == 0 for idx in indices): raise ValueError('cannot infer dimensions from zero ' 'sized index arrays') - M = operator.index(np.max(row)) + 1 - N = operator.index(np.max(col)) + 1 - self._shape = check_shape((M, N)) - else: - # Use 2 steps to ensure shape has length 2. - M, N = shape - self._shape = check_shape((M, N)) + shape = tuple(operator.index(np.max(idx)) + 1 + for idx in indices) + self._shape = check_shape(shape, allow_1d=is_array) - idx_dtype = self._get_index_dtype((row, col), + idx_dtype = self._get_index_dtype(indices, maxval=max(self.shape), check_contents=True) - self.row = np.array(row, copy=copy, dtype=idx_dtype) - self.col = np.array(col, copy=copy, dtype=idx_dtype) + self.indices = tuple(np.array(idx, copy=copy, dtype=idx_dtype) + for idx in indices) self.data = getdata(obj, copy=copy, dtype=dtype) self.has_canonical_format = False else: if issparse(arg1): if arg1.format == self.format and copy: - self.row = arg1.row.copy() - self.col = arg1.col.copy() + self.indices = tuple(idx.copy() for idx in arg1.indices) self.data = arg1.data.copy() - self._shape = check_shape(arg1.shape) + self._shape = check_shape(arg1.shape, allow_1d=is_array) + self.has_canonical_format = arg1.has_canonical_format else: coo = arg1.tocoo() - self.row = coo.row - self.col = coo.col + self.indices = tuple(coo.indices) self.data = coo.data - self._shape = check_shape(coo.shape) - self.has_canonical_format = False + self._shape = check_shape(coo.shape, allow_1d=is_array) + self.has_canonical_format = False else: - #dense argument - M = np.atleast_2d(np.asarray(arg1)) - - if M.ndim != 2: - raise TypeError('expected dimension <= 2 array or matrix') - - self._shape = check_shape(M.shape) + # dense argument + M = np.asarray(arg1) + if not is_array: + M = np.atleast_2d(M) + if M.ndim != 2: + raise TypeError('expected dimension <= 2 array or matrix') + + self._shape = check_shape(M.shape, allow_1d=is_array) if shape is not None: - if check_shape(shape) != self._shape: + if check_shape(shape, allow_1d=is_array) != self._shape: message = f'inconsistent shapes: {shape} != {self._shape}' raise ValueError(message) index_dtype = self._get_index_dtype(maxval=max(self._shape)) - row, col = M.nonzero() - self.row = row.astype(index_dtype, copy=False) - self.col = col.astype(index_dtype, copy=False) - self.data = M[self.row, self.col] + indices = M.nonzero() + self.indices = tuple(idx.astype(index_dtype, copy=False) + for idx in indices) + self.data = M[indices] self.has_canonical_format = True if dtype is not None: @@ -98,8 +95,30 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): self._check() + @property + def row(self): + return self.indices[-2] if self.ndim > 1 else np.zeros_like(self.col) + + + @row.setter + def row(self, new_row): + if self.ndim < 2: + raise ValueError('cannot set row attribute of a 1-dimensional sparse array') + new_row = np.asarray(new_row, dtype=self.indices[-2].dtype) + self.indices = self.indices[:-2] + (new_row,) + self.indices[-1:] + + @property + def col(self): + return self.indices[-1] + + @col.setter + def col(self, new_col): + new_col = np.asarray(new_col, dtype=self.indices[-1].dtype) + self.indices = self.indices[:-1] + (new_col,) + def reshape(self, *args, **kwargs): - shape = check_shape(args, self.shape) + is_array = isinstance(self, sparray) + shape = check_shape(args, self.shape, allow_1d=is_array) order, copy = check_reshape_kwargs(kwargs) # Return early if reshape is not required @@ -109,112 +128,130 @@ def reshape(self, *args, **kwargs): else: return self - nrows, ncols = self.shape - - if order == 'C': - # Upcast to avoid overflows: the coo_array constructor - # below will downcast the results to a smaller dtype, if - # possible. - maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1)) - dtype = self._get_index_dtype(maxval=maxval) - - flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col - new_row, new_col = divmod(flat_indices, shape[1]) - elif order == 'F': - maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1)) - dtype = self._get_index_dtype(maxval=maxval) - - flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row - new_col, new_row = divmod(flat_indices, shape[0]) + # When reducing the number of dimensions, we need to be careful about + # index overflow. This is why we can't simply call + # `np.ravel_multi_index()` followed by `np.unravel_index()` here. + flat_indices = _ravel_indices(self.indices, self.shape, order=order) + if len(shape) == 2: + if order == 'C': + new_indices = divmod(flat_indices, shape[1]) + else: + new_indices = divmod(flat_indices, shape[0])[::-1] else: - raise ValueError("'order' must be 'C' or 'F'") + new_indices = np.unravel_index(flat_indices, shape, order=order) # Handle copy here rather than passing on to the constructor so that no - # copy will be made of new_row and new_col regardless + # copy will be made of `new_indices` regardless. if copy: new_data = self.data.copy() else: new_data = self.data - return self.__class__((new_data, (new_row, new_col)), - shape=shape, copy=False) + return self.__class__((new_data, new_indices), shape=shape, copy=False) reshape.__doc__ = _spbase.reshape.__doc__ def _getnnz(self, axis=None): - if axis is None: + if axis is None or (axis == 0 and self.ndim == 1): nnz = len(self.data) - if nnz != len(self.row) or nnz != len(self.col): - raise ValueError('row, column, and data array must all be the ' + if any(len(idx) != nnz for idx in self.indices): + raise ValueError('all index and data arrays must have the ' 'same length') - if self.data.ndim != 1 or self.row.ndim != 1 or \ - self.col.ndim != 1: + if self.data.ndim != 1 or any(idx.ndim != 1 for idx in self.indices): raise ValueError('row, column, and data arrays must be 1-D') return int(nnz) if axis < 0: - axis += 2 - if axis == 0: - return np.bincount(downcast_intp_index(self.col), - minlength=self.shape[1]) - elif axis == 1: - return np.bincount(downcast_intp_index(self.row), - minlength=self.shape[0]) - else: + axis += self.ndim + if axis >= self.ndim: raise ValueError('axis out of bounds') + if self.ndim > 2: + raise NotImplementedError('per-axis nnz for COO arrays with >2 ' + 'dimensions is not supported') + return np.bincount(downcast_intp_index(self.indices[1 - axis]), + minlength=self.shape[1 - axis]) _getnnz.__doc__ = _spbase._getnnz.__doc__ def _check(self): """ Checks data structure for consistency """ + if self.ndim != len(self.indices): + raise ValueError('mismatching number of index arrays for shape; ' + f'got {len(self.indices)}, expected {self.ndim}') # index arrays should have integer data types - if self.row.dtype.kind != 'i': - warn(f"row index array has non-integer dtype ({self.row.dtype.name})", - stacklevel=3) - if self.col.dtype.kind != 'i': - warn(f"col index array has non-integer dtype ({self.col.dtype.name})", - stacklevel=3) - - idx_dtype = self._get_index_dtype((self.row, self.col), maxval=max(self.shape)) - self.row = np.asarray(self.row, dtype=idx_dtype) - self.col = np.asarray(self.col, dtype=idx_dtype) + for i, idx in enumerate(self.indices): + if idx.dtype.kind != 'i': + warn(f'index array {i} has non-integer dtype ({idx.dtype.name})', + stacklevel=3) + + idx_dtype = self._get_index_dtype(self.indices, maxval=max(self.shape)) + self.indices = tuple(np.asarray(idx, dtype=idx_dtype) + for idx in self.indices) self.data = to_native(self.data) if self.nnz > 0: - if self.row.max() >= self.shape[0]: - raise ValueError('row index exceeds matrix dimensions') - if self.col.max() >= self.shape[1]: - raise ValueError('column index exceeds matrix dimensions') - if self.row.min() < 0: - raise ValueError('negative row index found') - if self.col.min() < 0: - raise ValueError('negative column index found') + for i, idx in enumerate(self.indices): + if idx.max() >= self.shape[i]: + raise ValueError(f'axis {i} index {idx.max()} exceeds ' + f'matrix dimension {self.shape[i]}') + if idx.min() < 0: + raise ValueError(f'negative axis {i} index: {idx.min()}') def transpose(self, axes=None, copy=False): - if axes is not None and axes != (1, 0): - raise ValueError("Sparse array/matrices do not support " - "an 'axes' parameter because swapping " - "dimensions is the only logical permutation.") - - M, N = self.shape - return self.__class__((self.data, (self.col, self.row)), - shape=(N, M), copy=copy) + if axes is None: + axes = range(self.ndim)[::-1] + elif isinstance(self, sparray): + if len(axes) != self.ndim: + raise ValueError("axes don't match matrix dimensions") + if len(set(axes)) != self.ndim: + raise ValueError("repeated axis in transpose") + elif axes != (1, 0): + raise ValueError("Sparse matrices do not support an 'axes' " + "parameter because swapping dimensions is the " + "only logical permutation.") + + permuted_shape = tuple(self._shape[i] for i in axes) + permuted_indices = tuple(self.indices[i] for i in axes) + return self.__class__((self.data, permuted_indices), + shape=permuted_shape, copy=copy) transpose.__doc__ = _spbase.transpose.__doc__ - def resize(self, *shape): - shape = check_shape(shape) - new_M, new_N = shape - M, N = self.shape + def resize(self, *shape) -> None: + is_array = isinstance(self, sparray) + shape = check_shape(shape, allow_1d=is_array) + + # Check for added dimensions. + if len(shape) > self.ndim: + flat_indices = _ravel_indices(self.indices, self.shape) + max_size = math.prod(shape) + self.indices = np.unravel_index(flat_indices[:max_size], shape) + self.data = self.data[:max_size] + self._shape = shape + return - if new_M < M or new_N < N: - mask = np.logical_and(self.row < new_M, self.col < new_N) + # Check for removed dimensions. + if len(shape) < self.ndim: + tmp_shape = ( + self._shape[:len(shape) - 1] # Original shape without last axis + + (-1,) # Last axis is used to flatten the array + + (1,) * (self.ndim - len(shape)) # Pad with ones + ) + tmp = self.reshape(tmp_shape) + self.indices = tmp.indices[:len(shape)] + self._shape = tmp.shape[:len(shape)] + + # Handle truncation of existing dimensions. + is_truncating = any(old > new for old, new in zip(self.shape, shape)) + if is_truncating: + mask = np.logical_and.reduce([ + idx < size for idx, size in zip(self.indices, shape) + ]) if not mask.all(): - self.row = self.row[mask] - self.col = self.col[mask] + self.indices = tuple(idx[mask] for idx in self.indices) self.data = self.data[mask] self._shape = shape @@ -226,10 +263,15 @@ def toarray(self, order=None, out=None): fortran = int(B.flags.f_contiguous) if not fortran and not B.flags.c_contiguous: raise ValueError("Output array must be C or F contiguous") - M,N = self.shape + if self.ndim > 2: + raise ValueError("Cannot densify higher-rank sparse array") + # This handles both 0D and 1D cases correctly regardless of the + # original shape. + M, N = self._shape_as_2d coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel('A'), fortran) - return B + # Note: reshape() doesn't copy here, but does return a new array (view). + return B.reshape(self.shape) toarray.__doc__ = _spbase.toarray.__doc__ @@ -253,6 +295,8 @@ def tocsc(self, copy=False): [0, 0, 0, 1]]) """ + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to csc format") if self.nnz == 0: return self._csc_container(self.shape, dtype=self.dtype) else: @@ -295,6 +339,8 @@ def tocsr(self, copy=False): [0, 0, 0, 1]]) """ + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to csr format") if self.nnz == 0: return self._csr_container(self.shape, dtype=self.dtype) else: @@ -326,6 +372,8 @@ def tocoo(self, copy=False): tocoo.__doc__ = _spbase.tocoo.__doc__ def todia(self, copy=False): + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to dia format") self.sum_duplicates() ks = self.col - self.row # the diagonal for each nonzero diags, diag_idx = np.unique(ks, return_inverse=True) @@ -348,6 +396,8 @@ def todia(self, copy=False): todia.__doc__ = _spbase.todia.__doc__ def todok(self, copy=False): + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to dok format") self.sum_duplicates() dok = self._dok_container((self.shape), dtype=self.dtype) dok._update(zip(zip(self.row,self.col),self.data)) @@ -357,6 +407,8 @@ def todok(self, copy=False): todok.__doc__ = _spbase.todok.__doc__ def diagonal(self, k=0): + if self.ndim != 2: + raise ValueError("diagonal requires two dimensions") rows, cols = self.shape if k <= -rows or k >= cols: return np.empty(0, dtype=self.data.dtype) @@ -368,9 +420,8 @@ def diagonal(self, k=0): row = self.row[diag_mask] data = self.data[diag_mask] else: - row, _, data = self._sum_duplicates(self.row[diag_mask], - self.col[diag_mask], - self.data[diag_mask]) + inds = tuple(idx[diag_mask] for idx in self.indices) + (row, _), data = self._sum_duplicates(inds, self.data[diag_mask]) diag[row + min(k, 0)] = data return diag @@ -378,6 +429,8 @@ def diagonal(self, k=0): diagonal.__doc__ = _data_matrix.diagonal.__doc__ def _setdiag(self, values, k): + if self.ndim != 2: + raise ValueError("setting a diagonal requires two dimensions") M, N = self.shape if values.ndim and not len(values): return @@ -408,54 +461,51 @@ def _setdiag(self, values, k): new_data[:] = values # Update the internal structure. - self.row = np.concatenate((self.row[keep], new_row)) - self.col = np.concatenate((self.col[keep], new_col)) + self.indices = (np.concatenate((self.row[keep], new_row)), + np.concatenate((self.col[keep], new_col))) self.data = np.concatenate((self.data[keep], new_data)) self.has_canonical_format = False # needed by _data_matrix - def _with_data(self,data,copy=True): + def _with_data(self, data, copy=True): """Returns a matrix with the same sparsity structure as self, - but with different data. By default the index arrays - (i.e. .row and .col) are copied. + but with different data. By default the index arrays are copied. """ if copy: - return self.__class__((data, (self.row.copy(), self.col.copy())), - shape=self.shape, dtype=data.dtype) + indices = tuple(idx.copy() for idx in self.indices) else: - return self.__class__((data, (self.row, self.col)), - shape=self.shape, dtype=data.dtype) + indices = self.indices + return self.__class__((data, indices), shape=self.shape, dtype=data.dtype) - def sum_duplicates(self): + def sum_duplicates(self) -> None: """Eliminate duplicate entries by adding them together This is an *in place* operation """ if self.has_canonical_format: return - summed = self._sum_duplicates(self.row, self.col, self.data) - self.row, self.col, self.data = summed + summed = self._sum_duplicates(self.indices, self.data) + self.indices, self.data = summed self.has_canonical_format = True - def _sum_duplicates(self, row, col, data): - # Assumes (data, row, col) not in canonical format. + def _sum_duplicates(self, indices, data): + # Assumes indices not in canonical format. if len(data) == 0: - return row, col, data + return indices, data # Sort indices w.r.t. rows, then cols. This corresponds to C-order, # which we rely on for argmin/argmax to return the first index in the # same way that numpy does (in the case of ties). - order = np.lexsort((col, row)) - row = row[order] - col = col[order] + order = np.lexsort(indices[::-1]) + indices = tuple(idx[order] for idx in indices) data = data[order] - unique_mask = ((row[1:] != row[:-1]) | - (col[1:] != col[:-1])) + unique_mask = np.logical_or.reduce([ + idx[1:] != idx[:-1] for idx in indices + ]) unique_mask = np.append(True, unique_mask) - row = row[unique_mask] - col = col[unique_mask] + indices = tuple(idx[unique_mask] for idx in indices) unique_inds, = np.nonzero(unique_mask) data = np.add.reduceat(data, unique_inds, dtype=self.dtype) - return row, col, data + return indices, data def eliminate_zeros(self): """Remove zero entries from the array/matrix @@ -464,8 +514,7 @@ def eliminate_zeros(self): """ mask = self.data != 0 self.data = self.data[mask] - self.row = self.row[mask] - self.col = self.col[mask] + self.indices = tuple(idx[mask] for idx in self.indices) ####################### # Arithmetic handlers # @@ -477,26 +526,73 @@ def _add_dense(self, other): dtype = upcast_char(self.dtype.char, other.dtype.char) result = np.array(other, dtype=dtype, copy=True) fortran = int(result.flags.f_contiguous) - M, N = self.shape + M, N = self._shape_as_2d coo_todense(M, N, self.nnz, self.row, self.col, self.data, result.ravel('A'), fortran) return self._container(result, copy=False) def _mul_vector(self, other): - #output array - result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, - other.dtype.char)) - coo_matvec(self.nnz, self.row, self.col, self.data, other, result) + result_shape = self.shape[0] if self.ndim > 1 else 1 + result = np.zeros(result_shape, + dtype=upcast_char(self.dtype.char, other.dtype.char)) + + if self.ndim == 2: + col = self.col + row = self.row + elif self.ndim == 1: + col = self.indices[0] + row = np.zeros_like(col) + else: + raise NotImplementedError( + f"coo_matvec not implemented for ndim={self.ndim}") + + coo_matvec(self.nnz, row, col, self.data, other, result) + # Array semantics return a scalar here, not a single-element array. + if isinstance(self, sparray) and result_shape == 1: + return result[0] return result def _mul_multivector(self, other): - result = np.zeros((other.shape[1], self.shape[0]), - dtype=upcast_char(self.dtype.char, other.dtype.char)) - for i, col in enumerate(other.T): - coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i]) + result_dtype = upcast_char(self.dtype.char, other.dtype.char) + if self.ndim == 2: + result_shape = (other.shape[1], self.shape[0]) + col = self.col + row = self.row + elif self.ndim == 1: + result_shape = (other.shape[1],) + col = self.indices[0] + row = np.zeros_like(col) + else: + raise NotImplementedError( + f"coo_matvec not implemented for ndim={self.ndim}") + + result = np.zeros(result_shape, dtype=result_dtype) + for i, other_col in enumerate(other.T): + coo_matvec(self.nnz, row, col, self.data, other_col, result[i:i + 1]) return result.T.view(type=type(other)) +def _ravel_indices(indices, shape, order='C'): + """Like np.ravel_multi_index, but avoids some overflow issues.""" + if len(indices) == 1: + return indices[0] + # Handle overflow as in https://github.com/scipy/scipy/pull/9132 + if len(indices) == 2: + nrows, ncols = shape + row, col = indices + if order == 'C': + maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1)) + idx_dtype = get_index_dtype(maxval=maxval) + return np.multiply(ncols, row, dtype=idx_dtype) + col + elif order == 'F': + maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1)) + idx_dtype = get_index_dtype(maxval=maxval) + return np.multiply(nrows, col, dtype=idx_dtype) + row + else: + raise ValueError("'order' must be 'C' or 'F'") + return np.ravel_multi_index(indices, shape, order=order) + + def isspmatrix_coo(x): """Is `x` of coo_matrix type? @@ -532,40 +628,37 @@ class coo_array(_coo_base, sparray): This can be instantiated in several ways: coo_array(D) - where D is a 2-D ndarray + where D is an ndarray coo_array(S) with another sparse array or matrix S (equivalent to S.tocoo()) - coo_array((M, N), [dtype]) - to construct an empty array with shape (M, N) + coo_array(shape, [dtype]) + to construct an empty sparse array with shape `shape` dtype is optional, defaulting to dtype='d'. - coo_array((data, (i, j)), [shape=(M, N)]) - to construct from three arrays: - 1. data[:] the entries of the array, in any order - 2. i[:] the row indices of the array entries - 3. j[:] the column indices of the array entries + coo_array((data, indices), [shape]) + to construct from existing data and index arrays: + 1. data[:] the entries of the sparse array, in any order + 2. indices[i][:] the axis-i indices of the data entries - Where ``A[i[k], j[k]] = data[k]``. When shape is not - specified, it is inferred from the index arrays + Where ``A[indices] = data``, and indices is a tuple of index arrays. + When shape is not specified, it is inferred from the index arrays. Attributes ---------- dtype : dtype - Data type of the array - shape : 2-tuple - Shape of the array + Data type of the sparse array + shape : tuple of integers + Shape of the sparse array ndim : int - Number of dimensions (this is always 2) + Number of dimensions of the sparse array nnz size data - COO format data array of the array - row - COO format row index array of the array - col - COO format column index array of the array + COO format data array of the sparse array + indices + COO format tuple of index arrays has_canonical_format : bool Whether the matrix has sorted indices and no duplicates format @@ -603,7 +696,7 @@ class coo_array(_coo_base, sparray): Examples -------- - >>> # Constructing an empty array + >>> # Constructing an empty sparse array >>> import numpy as np >>> from scipy.sparse import coo_array >>> coo_array((3, 4), dtype=np.int8).toarray() @@ -611,7 +704,7 @@ class coo_array(_coo_base, sparray): [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8) - >>> # Constructing an array using ijv format + >>> # Constructing a sparse array using ijv format >>> row = np.array([0, 3, 1, 0]) >>> col = np.array([0, 3, 1, 2]) >>> data = np.array([4, 5, 7, 9]) @@ -621,7 +714,7 @@ class coo_array(_coo_base, sparray): [0, 0, 0, 0], [0, 0, 0, 5]]) - >>> # Constructing an array with duplicate indices + >>> # Constructing a sparse array with duplicate indices >>> row = np.array([0, 0, 1, 3, 1, 0, 0]) >>> col = np.array([0, 2, 1, 3, 1, 0, 0]) >>> data = np.array([1, 1, 1, 1, 1, 1, 1]) @@ -751,3 +844,9 @@ class coo_matrix(spmatrix, _coo_base): """ + def __setstate__(self, state): + if 'indices' not in state: + # For retro-compatibility with the previous attributes + # storing nnz coordinates for 2D COO matrix. + state['indices'] = (state.pop('row'), state.pop('col')) + self.__dict__.update(state) diff --git a/scipy/sparse/_sputils.py b/scipy/sparse/_sputils.py index 96237e438f03..3bdb8f43b633 100644 --- a/scipy/sparse/_sputils.py +++ b/scipy/sparse/_sputils.py @@ -236,14 +236,14 @@ def isintlike(x) -> bool: return True -def isshape(x, nonneg=False, allow_ndim=False) -> bool: +def isshape(x, nonneg=False, *, allow_1d=False) -> bool: """Is x a valid tuple of dimensions? If nonneg, also checks that the dimensions are non-negative. - If allow_ndim, shapes of any dimensionality are allowed. + If allow_1d, shapes of length 1 or 2 are allowed. """ ndim = len(x) - if not allow_ndim and ndim != 2: + if ndim != 2 and not (allow_1d and ndim == 1): return False for d in x: if not isintlike(d): @@ -292,8 +292,26 @@ def validateaxis(axis) -> None: raise ValueError("axis out of range") -def check_shape(args, current_shape=None): - """Imitate numpy.matrix handling of shape arguments""" +def check_shape(args, current_shape=None, *, allow_1d=False) -> tuple[int, ...]: + """Imitate numpy.matrix handling of shape arguments + + Parameters + ---------- + args : array_like + Data structures providing information about the shape of the sparse array. + current_shape : tuple, optional + The current shape of the sparse array or matrix. + If None (default), the current shape will be inferred from args. + allow_1d : bool, optional + If True, then 1-D or 2-D arrays are accepted. + If False (default), then only 2-D arrays are accepted and an error is + raised otherwise. + + Returns + ------- + new_shape: tuple + The new shape after validation. + """ if len(args) == 0: raise TypeError("function missing 1 required positional argument: " "'shape'") @@ -308,9 +326,13 @@ def check_shape(args, current_shape=None): new_shape = tuple(operator.index(arg) for arg in args) if current_shape is None: - if len(new_shape) != 2: + if allow_1d: + if len(new_shape) not in (1, 2): + raise ValueError('shape must be a 1- or 2-tuple of positive ' + 'integers') + elif len(new_shape) != 2: raise ValueError('shape must be a 2-tuple of positive integers') - elif any(d < 0 for d in new_shape): + if any(d < 0 for d in new_shape): raise ValueError("'shape' elements cannot be negative") else: # Check the current size only if needed @@ -335,7 +357,7 @@ def check_shape(args, current_shape=None): else: raise ValueError('can only specify one unknown dimension') - if len(new_shape) != 2: + if len(new_shape) != 2 and not (allow_1d and len(new_shape) == 1): raise ValueError('matrix shape must be two-dimensional') return new_shape diff --git a/scipy/sparse/tests/meson.build b/scipy/sparse/tests/meson.build index 07b14b446757..b0959d8cea05 100644 --- a/scipy/sparse/tests/meson.build +++ b/scipy/sparse/tests/meson.build @@ -2,6 +2,7 @@ python_sources = [ '__init__.py', 'test_array_api.py', 'test_base.py', + 'test_coo.py', 'test_construct.py', 'test_deprecations.py', 'test_csc.py', diff --git a/scipy/sparse/tests/test_base.py b/scipy/sparse/tests/test_base.py index f7852d280e46..b5422c0c5761 100644 --- a/scipy/sparse/tests/test_base.py +++ b/scipy/sparse/tests/test_base.py @@ -1230,13 +1230,13 @@ def test_todense(self): chk = self.datsp.todense(out=out) assert_array_equal(self.dat, out) assert_array_equal(self.dat, chk) - assert_(chk.base is out) + assert np.may_share_memory(chk, out) # Check with out array (matrix). out = asmatrix(np.zeros(self.datsp.shape, dtype=self.datsp.dtype)) chk = self.datsp.todense(out=out) assert_array_equal(self.dat, out) assert_array_equal(self.dat, chk) - assert_(chk is out) + assert np.may_share_memory(chk, out) a = array([[1.,2.,3.]]) dense_dot_dense = a @ self.dat check = a @ self.datsp.todense() @@ -1336,9 +1336,8 @@ def test_astype_immutable(self): S = self.spcreator(D) if hasattr(S, 'data'): S.data.flags.writeable = False - if hasattr(S, 'indptr'): + if S.format in ('csr', 'csc', 'bsr'): S.indptr.flags.writeable = False - if hasattr(S, 'indices'): S.indices.flags.writeable = False for x in supported_dtypes: D_casted = D.astype(x) @@ -2098,9 +2097,17 @@ def check(): assert_equal(datsp.shape, sploaded.shape) assert_array_equal(datsp.toarray(), sploaded.toarray()) assert_equal(datsp.format, sploaded.format) + # Hacky check for class member equality. This assumes that + # all instance variables are one of: + # 1. Plain numpy ndarrays + # 2. Tuples of ndarrays + # 3. Types that support equality comparison with == for key, val in datsp.__dict__.items(): if isinstance(val, np.ndarray): assert_array_equal(val, sploaded.__dict__[key]) + elif (isinstance(val, tuple) and val + and isinstance(val[0], np.ndarray)): + assert_array_equal(val, sploaded.__dict__[key]) else: assert_(val == sploaded.__dict__[key]) check() diff --git a/scipy/sparse/tests/test_construct.py b/scipy/sparse/tests/test_construct.py index 1dbfeacc20cd..8dac24f33d8c 100644 --- a/scipy/sparse/tests/test_construct.py +++ b/scipy/sparse/tests/test_construct.py @@ -614,6 +614,12 @@ def test_block_diag_scalar_1d_args(self): # one 1d matrix and a scalar assert_array_equal(construct.block_diag([[2,3], 4]).toarray(), [[2, 3, 0], [0, 0, 4]]) + # 1d sparse arrays + A = coo_array([1,0,3]) + B = coo_array([0,4]) + assert_array_equal(construct.block_diag([A, B]).toarray(), + [[1, 0, 3, 0, 0], [0, 0, 0, 0, 4]]) + def test_block_diag_1(self): """ block_diag with one matrix """ diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py new file mode 100644 index 000000000000..b7a6cee97b72 --- /dev/null +++ b/scipy/sparse/tests/test_coo.py @@ -0,0 +1,271 @@ +import numpy as np +import pytest +from scipy.sparse import coo_array + + +def test_shape_constructor(): + empty1d = coo_array((3,)) + assert empty1d.shape == (3,) + assert np.array_equal(empty1d.toarray(), np.zeros((3,))) + + empty2d = coo_array((3, 2)) + assert empty2d.shape == (3, 2) + assert np.array_equal(empty2d.toarray(), np.zeros((3, 2))) + + with pytest.raises(TypeError, match='invalid input format'): + coo_array((3, 2, 2)) + + +def test_dense_constructor(): + res1d = coo_array([1, 2, 3]) + assert res1d.shape == (3,) + assert np.array_equal(res1d.toarray(), np.array([1, 2, 3])) + + res2d = coo_array([[1, 2, 3], [4, 5, 6]]) + assert res2d.shape == (2, 3) + assert np.array_equal(res2d.toarray(), np.array([[1, 2, 3], [4, 5, 6]])) + + with pytest.raises(ValueError, match='shape must be a 1- or 2-tuple'): + coo_array([[[3]], [[4]]]) + + +def test_dense_constructor_with_shape(): + res1d = coo_array([1, 2, 3], shape=(3,)) + assert res1d.shape == (3,) + assert np.array_equal(res1d.toarray(), np.array([1, 2, 3])) + + res2d = coo_array([[1, 2, 3], [4, 5, 6]], shape=(2, 3)) + assert res2d.shape == (2, 3) + assert np.array_equal(res2d.toarray(), np.array([[1, 2, 3], [4, 5, 6]])) + + with pytest.raises(ValueError, match='shape must be a 1- or 2-tuple'): + coo_array([[[3]], [[4]]], shape=(2, 1, 1)) + + +def test_dense_constructor_with_inconsistent_shape(): + with pytest.raises(ValueError, match='inconsistent shapes'): + coo_array([1, 2, 3], shape=(4,)) + + with pytest.raises(ValueError, match='inconsistent shapes'): + coo_array([1, 2, 3], shape=(3, 1)) + + with pytest.raises(ValueError, match='inconsistent shapes'): + coo_array([[1, 2, 3]], shape=(3,)) + + with pytest.raises(ValueError, + match='axis 0 index 2 exceeds matrix dimension 2'): + coo_array(([1], ([2],)), shape=(2,)) + + with pytest.raises(ValueError, match='negative axis 0 index: -1'): + coo_array(([1], ([-1],))) + + +def test_1d_sparse_constructor(): + empty1d = coo_array((3,)) + res = coo_array(empty1d) + assert res.shape == (3,) + assert np.array_equal(res.toarray(), np.zeros((3,))) + + +def test_1d_tuple_constructor(): + res = coo_array(([9,8], ([1,2],))) + assert res.shape == (3,) + assert np.array_equal(res.toarray(), np.array([0, 9, 8])) + + +def test_1d_tuple_constructor_with_shape(): + res = coo_array(([9,8], ([1,2],)), shape=(4,)) + assert res.shape == (4,) + assert np.array_equal(res.toarray(), np.array([0, 9, 8, 0])) + +def test_non_subscriptability(): + coo_2d = coo_array((2, 2)) + + with pytest.raises(TypeError, + match="'coo_array' object does not support item assignment"): + coo_2d[0, 0] = 1 + + with pytest.raises(TypeError, + match="'coo_array' object is not subscriptable"): + coo_2d[0, :] + +def test_reshape(): + arr1d = coo_array([1, 0, 3]) + assert arr1d.shape == (3,) + + col_vec = arr1d.reshape((3, 1)) + assert col_vec.shape == (3, 1) + assert np.array_equal(col_vec.toarray(), np.array([[1], [0], [3]])) + + row_vec = arr1d.reshape((1, 3)) + assert row_vec.shape == (1, 3) + assert np.array_equal(row_vec.toarray(), np.array([[1, 0, 3]])) + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]) + assert arr2d.shape == (2, 3) + + flat = arr2d.reshape((6,)) + assert flat.shape == (6,) + assert np.array_equal(flat.toarray(), np.array([1, 2, 0, 0, 0, 3])) + + +def test_nnz(): + arr1d = coo_array([1, 0, 3]) + assert arr1d.shape == (3,) + assert arr1d.nnz == 2 + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]) + assert arr2d.shape == (2, 3) + assert arr2d.nnz == 3 + + +def test_transpose(): + arr1d = coo_array([1, 0, 3]).T + assert arr1d.shape == (3,) + assert np.array_equal(arr1d.toarray(), np.array([1, 0, 3])) + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]).T + assert arr2d.shape == (3, 2) + assert np.array_equal(arr2d.toarray(), np.array([[1, 0], [2, 0], [0, 3]])) + + +def test_transpose_with_axis(): + arr1d = coo_array([1, 0, 3]).transpose(axes=(0,)) + assert arr1d.shape == (3,) + assert np.array_equal(arr1d.toarray(), np.array([1, 0, 3])) + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]).transpose(axes=(0, 1)) + assert arr2d.shape == (2, 3) + assert np.array_equal(arr2d.toarray(), np.array([[1, 2, 0], [0, 0, 3]])) + + with pytest.raises(ValueError, match="axes don't match matrix dimensions"): + coo_array([1, 0, 3]).transpose(axes=(0, 1)) + + with pytest.raises(ValueError, match="repeated axis in transpose"): + coo_array([[1, 2, 0], [0, 0, 3]]).transpose(axes=(1, 1)) + + +def test_1d_row_and_col(): + res = coo_array([1, -2, -3]) + assert np.array_equal(res.col, np.array([0, 1, 2])) + assert np.array_equal(res.row, np.zeros_like(res.col)) + assert res.row.dtype == res.col.dtype + + res.col = [1, 2, 3] + assert len(res.indices) == 1 + assert np.array_equal(res.col, np.array([1, 2, 3])) + assert res.row.dtype == res.col.dtype + + with pytest.raises(ValueError, match="cannot set row attribute"): + res.row = [1, 2, 3] + + +def test_1d_tocsc_tocsr_todia_todok(): + res = coo_array([1, -2, -3]) + for f in [res.tocsc, res.tocsr, res.todok, res.todia]: + with pytest.raises(ValueError, match='Cannot convert'): + f() + + +@pytest.mark.parametrize('arg', [1, 2, 4, 5, 8]) +def test_1d_resize(arg: int): + den = np.array([1, -2, -3]) + res = coo_array(den) + den.resize(arg, refcheck=False) + res.resize(arg) + assert res.shape == den.shape + assert np.array_equal(res.toarray(), den) + + +@pytest.mark.parametrize('arg', zip([1, 2, 3, 4], [1, 2, 3, 4])) +def test_1d_to_2d_resize(arg: tuple[int, int]): + den = np.array([1, 0, 3]) + res = coo_array(den) + + den.resize(arg, refcheck=False) + res.resize(arg) + assert res.shape == den.shape + assert np.array_equal(res.toarray(), den) + + +@pytest.mark.parametrize('arg', [1, 4, 6, 8]) +def test_2d_to_1d_resize(arg: int): + den = np.array([[1, 0, 3], [4, 0, 0]]) + res = coo_array(den) + den.resize(arg, refcheck=False) + res.resize(arg) + assert res.shape == den.shape + assert np.array_equal(res.toarray(), den) + + +def test_sum_duplicates(): + arr1d = coo_array(([2, 2, 2], ([1, 0, 1],))) + assert arr1d.nnz == 3 + assert np.array_equal(arr1d.toarray(), np.array([2, 4])) + arr1d.sum_duplicates() + assert arr1d.nnz == 2 + assert np.array_equal(arr1d.toarray(), np.array([2, 4])) + + +def test_eliminate_zeros(): + arr1d = coo_array(([0, 0, 1], ([1, 0, 1],))) + assert arr1d.nnz == 3 + assert arr1d.count_nonzero() == 1 + assert np.array_equal(arr1d.toarray(), np.array([0, 1])) + arr1d.eliminate_zeros() + assert arr1d.nnz == 1 + assert arr1d.count_nonzero() == 1 + assert np.array_equal(arr1d.toarray(), np.array([0, 1])) + assert np.array_equal(arr1d.col, np.array([1])) + assert np.array_equal(arr1d.row, np.array([0])) + + +def test_1d_add_dense(): + den_a = np.array([0, -2, -3, 0]) + den_b = np.array([0, 1, 2, 3]) + exp = den_a + den_b + res = coo_array(den_a) + den_b + assert type(res) == type(exp) + assert np.array_equal(res, exp) + + +def test_1d_add_sparse(): + den_a = np.array([0, -2, -3, 0]) + den_b = np.array([0, 1, 2, 3]) + # Currently this routes through CSR format, so 1d sparse addition + # isn't supported. + with pytest.raises(ValueError, + match='Cannot convert a 1d sparse array'): + coo_array(den_a) + coo_array(den_b) + + +def test_1d_mul_vector(): + den_a = np.array([0, -2, -3, 0]) + den_b = np.array([0, 1, 2, 3]) + exp = den_a @ den_b + res = coo_array(den_a) @ den_b + assert np.ndim(res) == 0 + assert np.array_equal(res, exp) + + +def test_1d_mul_multivector(): + den = np.array([0, -2, -3, 0]) + other = np.array([[0, 1, 2, 3], [3, 2, 1, 0]]).T + exp = den @ other + res = coo_array(den) @ other + assert type(res) == type(exp) + assert np.array_equal(res, exp) + + +def test_2d_mul_multivector(): + den = np.array([[0, 1, 2, 3], [3, 2, 1, 0]]) + arr2d = coo_array(den) + exp = den @ den.T + res = arr2d @ arr2d.T + assert np.array_equal(res.toarray(), exp) + + +def test_1d_diagonal(): + den = np.array([0, -2, -3, 0]) + with pytest.raises(ValueError, match='diagonal requires two dimensions'): + coo_array(den).diagonal() diff --git a/scipy/sparse/tests/test_sputils.py b/scipy/sparse/tests/test_sputils.py index 94f5de26e1f5..4545b49bea2c 100644 --- a/scipy/sparse/tests/test_sputils.py +++ b/scipy/sparse/tests/test_sputils.py @@ -67,13 +67,13 @@ def test_isshape(self): assert_equal(sputils.isshape((-1, 2), nonneg=True),False) assert_equal(sputils.isshape((2, -1), nonneg=True),False) - assert_equal(sputils.isshape((1.5, 2), allow_ndim=True), False) - assert_equal(sputils.isshape(([2], 2), allow_ndim=True), False) - assert_equal(sputils.isshape((2, 2, -2), nonneg=True, allow_ndim=True), + assert_equal(sputils.isshape((1.5, 2), allow_1d=True), False) + assert_equal(sputils.isshape(([2], 2), allow_1d=True), False) + assert_equal(sputils.isshape((2, 2, -2), nonneg=True, allow_1d=True), False) - assert_equal(sputils.isshape((2,), allow_ndim=True), True) - assert_equal(sputils.isshape((2, 2,), allow_ndim=True), True) - assert_equal(sputils.isshape((2, 2, 2), allow_ndim=True), True) + assert_equal(sputils.isshape((2,), allow_1d=True), True) + assert_equal(sputils.isshape((2, 2,), allow_1d=True), True) + assert_equal(sputils.isshape((2, 2, 2), allow_1d=True), False) def test_issequence(self): assert_equal(sputils.issequence((1,)), True)