From 45aa3fa3c956e7463bfc4b949a46c9bd3c95aae6 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Sun, 15 Oct 2023 21:27:24 -0400 Subject: [PATCH 01/14] Squashed changes --- scipy/sparse/_base.py | 21 +- scipy/sparse/_construct.py | 2 +- scipy/sparse/_coo.py | 408 +++++++++++++++++++------------- scipy/sparse/_sputils.py | 26 +- scipy/sparse/tests/meson.build | 1 + scipy/sparse/tests/test_base.py | 15 +- scipy/sparse/tests/test_coo.py | 283 ++++++++++++++++++++++ 7 files changed, 575 insertions(+), 181 deletions(-) create mode 100644 scipy/sparse/tests/test_coo.py diff --git a/scipy/sparse/_base.py b/scipy/sparse/_base.py index aa7ce11c9a85..8dbb23c3a163 100644 --- a/scipy/sparse/_base.py +++ b/scipy/sparse/_base.py @@ -67,7 +67,10 @@ class _spbase: __array_priority__ = 10.1 _format = 'und' # undefined - ndim = 2 + + @property + def ndim(self) -> int: + return len(self._shape) @property def _bsr_container(self): @@ -352,12 +355,14 @@ def real(self): def imag(self): return self._imag() - def __repr__(self): + def __repr__(self) -> str: _, format_name = _formats[self.format] sparse_cls = 'array' if isinstance(self, sparray) else 'matrix' - return f"<%dx%d sparse {sparse_cls} of type '%s'\n" \ - "\twith %d stored elements in %s format>" % \ - (self.shape + (self.dtype.type, self.nnz, format_name)) + shape_str = 'x'.join(str(x) for x in self.shape) + return ( + f"<{shape_str} sparse {sparse_cls} of type '{self.dtype.type}'\n" + f"\twith {self.nnz} stored elements in {format_name} format>" + ) def __str__(self): maxprint = self._getmaxprint() @@ -568,7 +573,11 @@ def _mul_dispatch(self, other): # This method has to be different from `__matmul__` because it is also # called by sparse matrix classes. - M, N = self.shape + # Currently matrix multiplication is only supported + # for 2D arrays. Hence we unpacked and use only the + # two last axes' lengths. + N = self.shape[-1] + M = self.shape[-2] if self.ndim > 1 else 1 if other.__class__ is np.ndarray: # Fast path for the most common case diff --git a/scipy/sparse/_construct.py b/scipy/sparse/_construct.py index 229f2fedd67f..542d9f34d70a 100644 --- a/scipy/sparse/_construct.py +++ b/scipy/sparse/_construct.py @@ -931,7 +931,7 @@ def block_diag(mats, format=None, dtype=None): c_idx = 0 for a in mats: if isinstance(a, (list, numbers.Number)): - a = coo_array(a) + a = coo_array(np.atleast_2d(a)) nrows, ncols = a.shape if issparse(a): a = a.tocoo() diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 3de9da678153..ceae5e7524ef 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -24,71 +24,71 @@ class _coo_base(_data_matrix, _minmax_mixin): def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) + is_array = isinstance(self, sparray) if isinstance(arg1, tuple): - if isshape(arg1): - M, N = arg1 - self._shape = check_shape((M, N)) - idx_dtype = self._get_index_dtype(maxval=max(M, N)) + if isshape(arg1, allow_ndim=is_array): + self._shape = check_shape(arg1, allow_ndim=is_array) + idx_dtype = self._get_index_dtype(maxval=max(self._shape)) data_dtype = getdtype(dtype, default=float) - self.row = np.array([], dtype=idx_dtype) - self.col = np.array([], dtype=idx_dtype) + self.indices = tuple(np.array([], dtype=idx_dtype) + for _ in range(len(self._shape))) self.data = np.array([], dtype=data_dtype) self.has_canonical_format = True else: try: - obj, (row, col) = arg1 + obj, indices = arg1 except (TypeError, ValueError) as e: raise TypeError('invalid input format') from e if shape is None: - if len(row) == 0 or len(col) == 0: + if any(len(idx) == 0 for idx in indices): raise ValueError('cannot infer dimensions from zero ' 'sized index arrays') - M = operator.index(np.max(row)) + 1 - N = operator.index(np.max(col)) + 1 - self._shape = check_shape((M, N)) - else: - # Use 2 steps to ensure shape has length 2. - M, N = shape - self._shape = check_shape((M, N)) - - idx_dtype = self._get_index_dtype((row, col), maxval=max(self.shape), check_contents=True) - self.row = np.array(row, copy=copy, dtype=idx_dtype) - self.col = np.array(col, copy=copy, dtype=idx_dtype) + shape = tuple(operator.index(np.max(idx)) + 1 + for idx in indices) + self._shape = check_shape(shape, allow_ndim=is_array) + + idx_dtype = self._get_index_dtype(indices, + maxval=max(self.shape), + check_contents=True) + self.indices = tuple(np.array(idx, copy=copy, dtype=idx_dtype) + for idx in indices) self.data = getdata(obj, copy=copy, dtype=dtype) self.has_canonical_format = False else: if issparse(arg1): if arg1.format == self.format and copy: - self.row = arg1.row.copy() - self.col = arg1.col.copy() + self.indices = tuple(idx.copy() for idx in arg1.indices) self.data = arg1.data.copy() - self._shape = check_shape(arg1.shape) + self._shape = check_shape(arg1.shape, + allow_ndim=is_array) + self.has_canonical_format = arg1.has_canonical_format else: coo = arg1.tocoo() - self.row = coo.row - self.col = coo.col + self.indices = tuple(coo.indices) self.data = coo.data - self._shape = check_shape(coo.shape) - self.has_canonical_format = False + self._shape = check_shape(coo.shape, + allow_ndim=is_array) + self.has_canonical_format = False else: - #dense argument - M = np.atleast_2d(np.asarray(arg1)) - - if M.ndim != 2: - raise TypeError('expected dimension <= 2 array or matrix') - - self._shape = check_shape(M.shape) + # dense argument + M = np.asarray(arg1) + if not is_array: + M = np.atleast_2d(M) + if M.ndim != 2: + raise TypeError('expected dimension <= 2 array or matrix') + + self._shape = check_shape(M.shape, allow_ndim=is_array) if shape is not None: - if check_shape(shape) != self._shape: + if check_shape(shape, allow_ndim=is_array) != self._shape: raise ValueError('inconsistent shapes: %s != %s' % (shape, self._shape)) index_dtype = self._get_index_dtype(maxval=max(self._shape)) - row, col = M.nonzero() - self.row = row.astype(index_dtype, copy=False) - self.col = col.astype(index_dtype, copy=False) - self.data = M[self.row, self.col] + indices = M.nonzero() + self.indices = tuple(idx.astype(index_dtype, copy=False) + for idx in indices) + self.data = M[indices] self.has_canonical_format = True if dtype is not None: @@ -96,8 +96,29 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): self._check() + @property + def row(self): + return self.indices[0] + + @row.setter + def row(self, new_row): + new_row = np.asarray(new_row, dtype=self.indices[0].dtype) + self.indices = (new_row,) + self.indices[1:] + + @property + def col(self): + return self.indices[1] if self.ndim > 1 else np.zeros_like(self.row) + + @col.setter + def col(self, new_col): + if self.ndim < 2: + raise ValueError('cannot set col attribute of a 1-dimensional sparse array') + new_col = np.asarray(new_col, dtype=self.indices[1].dtype) + self.indices = self.indices[:1] + (new_col,) + self.indices[2:] + def reshape(self, *args, **kwargs): - shape = check_shape(args, self.shape) + is_array = isinstance(self, sparray) + shape = check_shape(args, self.shape, allow_ndim=is_array) order, copy = check_reshape_kwargs(kwargs) # Return early if reshape is not required @@ -107,23 +128,9 @@ def reshape(self, *args, **kwargs): else: return self - nrows, ncols = self.shape - - if order == 'C': - # Upcast to avoid overflows: the coo_array constructor - # below will downcast the results to a smaller dtype, if - # possible. - dtype = self._get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1))) - - flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col - new_row, new_col = divmod(flat_indices, shape[1]) - elif order == 'F': - dtype = self._get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1))) - - flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row - new_col, new_row = divmod(flat_indices, shape[0]) - else: - raise ValueError("'order' must be 'C' or 'F'") + # TODO: Handle overflow as in https://github.com/scipy/scipy/pull/9132 + flat_indices = np.ravel_multi_index(self.indices, self.shape, order=order) + new_indices = np.unravel_index(flat_indices, shape, order=order) # Handle copy here rather than passing on to the constructor so that no # copy will be made of new_row and new_col regardless @@ -132,85 +139,110 @@ def reshape(self, *args, **kwargs): else: new_data = self.data - return self.__class__((new_data, (new_row, new_col)), - shape=shape, copy=False) + return self.__class__((new_data, new_indices), shape=shape, copy=False) reshape.__doc__ = _spbase.reshape.__doc__ def _getnnz(self, axis=None): - if axis is None: + if axis is None or (axis == 0 and self.ndim == 1): nnz = len(self.data) - if nnz != len(self.row) or nnz != len(self.col): - raise ValueError('row, column, and data array must all be the ' + if any(len(idx) != nnz for idx in self.indices): + raise ValueError('all index and data arrays must have the ' 'same length') - if self.data.ndim != 1 or self.row.ndim != 1 or \ - self.col.ndim != 1: + if self.data.ndim != 1 or any(idx.ndim != 1 for idx in self.indices): raise ValueError('row, column, and data arrays must be 1-D') return int(nnz) if axis < 0: - axis += 2 - if axis == 0: - return np.bincount(downcast_intp_index(self.col), - minlength=self.shape[1]) - elif axis == 1: - return np.bincount(downcast_intp_index(self.row), - minlength=self.shape[0]) - else: + axis += self.ndim + if axis >= self.ndim: raise ValueError('axis out of bounds') + if self.ndim > 2: + raise NotImplementedError('per-axis nnz for COO arrays with >2 ' + 'dimensions is not supported') + return np.bincount(downcast_intp_index(self.indices[1 - axis]), + minlength=self.shape[1 - axis]) _getnnz.__doc__ = _spbase._getnnz.__doc__ def _check(self): """ Checks data structure for consistency """ + if self.ndim != len(self.indices): + raise ValueError('mismatching number of index arrays for shape; ' + f'got {len(self.indices)}, expected {self.ndim}') # index arrays should have integer data types - if self.row.dtype.kind != 'i': - warn("row index array has non-integer dtype (%s) " - % self.row.dtype.name) - if self.col.dtype.kind != 'i': - warn("col index array has non-integer dtype (%s) " - % self.col.dtype.name) - - idx_dtype = self._get_index_dtype((self.row, self.col), maxval=max(self.shape)) - self.row = np.asarray(self.row, dtype=idx_dtype) - self.col = np.asarray(self.col, dtype=idx_dtype) + for i, idx in enumerate(self.indices): + if idx.dtype.kind != 'i': + warn(f'index array {i} has non-integer dtype ({idx.dtype.name}) ') + + idx_dtype = self._get_index_dtype(self.indices, maxval=max(self.shape)) + self.indices = tuple(np.asarray(idx, dtype=idx_dtype) + for idx in self.indices) self.data = to_native(self.data) if self.nnz > 0: - if self.row.max() >= self.shape[0]: - raise ValueError('row index exceeds matrix dimensions') - if self.col.max() >= self.shape[1]: - raise ValueError('column index exceeds matrix dimensions') - if self.row.min() < 0: - raise ValueError('negative row index found') - if self.col.min() < 0: - raise ValueError('negative column index found') + for i, idx in enumerate(self.indices): + if idx.max() >= self.shape[i]: + raise ValueError(f'axis {i} index {idx.max()} exceeds ' + f'matrix dimension {self.shape[i]}') + if idx.min() < 0: + raise ValueError(f'negative axis {i} index: {idx.min()}') def transpose(self, axes=None, copy=False): - if axes is not None and axes != (1, 0): - raise ValueError("Sparse array/matrices do not support " - "an 'axes' parameter because swapping " - "dimensions is the only logical permutation.") - - M, N = self.shape - return self.__class__((self.data, (self.col, self.row)), - shape=(N, M), copy=copy) + if axes is None: + axes = range(self.ndim)[::-1] + elif isinstance(self, sparray): + if len(axes) != self.ndim: + raise ValueError("axes don't match matrix dimensions") + if len(set(axes)) != self.ndim: + raise ValueError("repeated axis in transpose") + elif axes != (1, 0): + raise ValueError("Sparse matrices do not support an 'axes' " + "parameter because swapping dimensions is the " + "only logical permutation.") + + permuted_shape = tuple(self._shape[i] for i in axes) + permuted_indices = tuple(self.indices[i] for i in axes) + return self.__class__((self.data, permuted_indices), + shape=permuted_shape, copy=copy) transpose.__doc__ = _spbase.transpose.__doc__ - def resize(self, *shape): - shape = check_shape(shape) - new_M, new_N = shape - M, N = self.shape + def resize(self, *shape) -> None: + is_array = isinstance(self, sparray) + shape = check_shape(shape, allow_ndim=is_array) + + # Check for added dimensions. + if len(shape) > self.ndim: + flat_indices = np.ravel_multi_index(self.indices, self.shape) + max_size = np.prod(shape) + self.indices = np.unravel_index(flat_indices[:max_size], shape) + self.data = self.data[:max_size] + self._shape = shape + return - if new_M < M or new_N < N: - mask = np.logical_and(self.row < new_M, self.col < new_N) + # Check for removed dimensions. + if len(shape) < self.ndim: + tmp_shape = ( + self._shape[:len(shape) - 1] # Original shape without last axis + + (-1,) # Last axis is used to flatten the array + + (1,) * (self.ndim - len(shape)) # Pad with ones + ) + tmp = self.reshape(tmp_shape) + self.indices = tmp.indices[:len(shape)] + self._shape = tmp.shape[:len(shape)] + + # Handle truncation of existing dimensions. + is_truncating = any(old > new for old, new in zip(self.shape, shape)) + if is_truncating: + mask = np.logical_and.reduce([ + idx < size for idx, size in zip(self.indices, shape) + ]) if not mask.all(): - self.row = self.row[mask] - self.col = self.col[mask] + self.indices = tuple(idx[mask] for idx in self.indices) self.data = self.data[mask] self._shape = shape @@ -222,10 +254,17 @@ def toarray(self, order=None, out=None): fortran = int(B.flags.f_contiguous) if not fortran and not B.flags.c_contiguous: raise ValueError("Output array must be C or F contiguous") - M,N = self.shape + if self.ndim > 2: + raise ValueError("Cannot densify higher-rank sparse array") + # This handles both 0D and 1D cases correctly regardless of the + # original shape. + M, N, *_ = self.shape + (1, 1) coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel('A'), fortran) - return B + # Note: reshape() doesn't copy here, but does return a new array (view). + return B.reshape(self.shape) + + toarray.__doc__ = _spbase.toarray.__doc__ toarray.__doc__ = _spbase.toarray.__doc__ @@ -249,6 +288,8 @@ def tocsc(self, copy=False): [0, 0, 0, 1]]) """ + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to csc format") if self.nnz == 0: return self._csc_container(self.shape, dtype=self.dtype) else: @@ -291,6 +332,8 @@ def tocsr(self, copy=False): [0, 0, 0, 1]]) """ + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to csr format") if self.nnz == 0: return self._csr_container(self.shape, dtype=self.dtype) else: @@ -322,6 +365,8 @@ def tocoo(self, copy=False): tocoo.__doc__ = _spbase.tocoo.__doc__ def todia(self, copy=False): + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to dia format") self.sum_duplicates() ks = self.col - self.row # the diagonal for each nonzero diags, diag_idx = np.unique(ks, return_inverse=True) @@ -343,6 +388,8 @@ def todia(self, copy=False): todia.__doc__ = _spbase.todia.__doc__ def todok(self, copy=False): + if self.ndim != 2: + raise ValueError("Cannot convert a 1d sparse array to dok format") self.sum_duplicates() dok = self._dok_container((self.shape), dtype=self.dtype) dok._update(zip(zip(self.row,self.col),self.data)) @@ -352,6 +399,8 @@ def todok(self, copy=False): todok.__doc__ = _spbase.todok.__doc__ def diagonal(self, k=0): + if self.ndim != 2: + raise ValueError("diagonal requires two dimensions") rows, cols = self.shape if k <= -rows or k >= cols: return np.empty(0, dtype=self.data.dtype) @@ -363,9 +412,8 @@ def diagonal(self, k=0): row = self.row[diag_mask] data = self.data[diag_mask] else: - row, _, data = self._sum_duplicates(self.row[diag_mask], - self.col[diag_mask], - self.data[diag_mask]) + inds = tuple(idx[diag_mask] for idx in self.indices) + (row, _), data = self._sum_duplicates(inds, self.data[diag_mask]) diag[row + min(k, 0)] = data return diag @@ -373,6 +421,8 @@ def diagonal(self, k=0): diagonal.__doc__ = _data_matrix.diagonal.__doc__ def _setdiag(self, values, k): + if self.ndim != 2: + raise ValueError("setting a diagonal requires two dimensions") M, N = self.shape if values.ndim and not len(values): return @@ -403,54 +453,51 @@ def _setdiag(self, values, k): new_data[:] = values # Update the internal structure. - self.row = np.concatenate((self.row[keep], new_row)) - self.col = np.concatenate((self.col[keep], new_col)) + self.indices = (np.concatenate((self.row[keep], new_row)), + np.concatenate((self.col[keep], new_col))) self.data = np.concatenate((self.data[keep], new_data)) self.has_canonical_format = False # needed by _data_matrix - def _with_data(self,data,copy=True): + def _with_data(self, data, copy=True): """Returns a matrix with the same sparsity structure as self, - but with different data. By default the index arrays - (i.e. .row and .col) are copied. + but with different data. By default the index arrays are copied. """ if copy: - return self.__class__((data, (self.row.copy(), self.col.copy())), - shape=self.shape, dtype=data.dtype) + indices = tuple(idx.copy() for idx in self.indices) else: - return self.__class__((data, (self.row, self.col)), - shape=self.shape, dtype=data.dtype) + indices = self.indices + return self.__class__((data, indices), shape=self.shape, dtype=data.dtype) - def sum_duplicates(self): + def sum_duplicates(self) -> None: """Eliminate duplicate entries by adding them together This is an *in place* operation """ if self.has_canonical_format: return - summed = self._sum_duplicates(self.row, self.col, self.data) - self.row, self.col, self.data = summed + summed = self._sum_duplicates(self.indices, self.data) + self.indices, self.data = summed self.has_canonical_format = True - def _sum_duplicates(self, row, col, data): - # Assumes (data, row, col) not in canonical format. + def _sum_duplicates(self, indices, data): + # Assumes indices not in canonical format. if len(data) == 0: - return row, col, data + return indices, data # Sort indices w.r.t. rows, then cols. This corresponds to C-order, # which we rely on for argmin/argmax to return the first index in the # same way that numpy does (in the case of ties). - order = np.lexsort((col, row)) - row = row[order] - col = col[order] + order = np.lexsort(indices[::-1]) + indices = tuple(idx[order] for idx in indices) data = data[order] - unique_mask = ((row[1:] != row[:-1]) | - (col[1:] != col[:-1])) + unique_mask = np.logical_or.reduce([ + idx[1:] != idx[:-1] for idx in indices + ]) unique_mask = np.append(True, unique_mask) - row = row[unique_mask] - col = col[unique_mask] + indices = tuple(idx[unique_mask] for idx in indices) unique_inds, = np.nonzero(unique_mask) data = np.add.reduceat(data, unique_inds, dtype=self.dtype) - return row, col, data + return indices, data def eliminate_zeros(self): """Remove zero entries from the array/matrix @@ -459,8 +506,7 @@ def eliminate_zeros(self): """ mask = self.data != 0 self.data = self.data[mask] - self.row = self.row[mask] - self.col = self.col[mask] + self.indices = tuple(idx[mask] for idx in self.indices) ####################### # Arithmetic handlers # @@ -473,23 +519,50 @@ def _add_dense(self, other): dtype = upcast_char(self.dtype.char, other.dtype.char) result = np.array(other, dtype=dtype, copy=True) fortran = int(result.flags.f_contiguous) - M, N = self.shape + M = self.shape[0] + N = self.shape[1] if self.ndim > 1 else 1 coo_todense(M, N, self.nnz, self.row, self.col, self.data, result.ravel('A'), fortran) return self._container(result, copy=False) def _mul_vector(self, other): - #output array - result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, - other.dtype.char)) - coo_matvec(self.nnz, self.row, self.col, self.data, other, result) + result_shape = self.shape[0] if self.ndim > 1 else 1 + result = np.zeros(result_shape, + dtype=upcast_char(self.dtype.char, other.dtype.char)) + + if self.ndim == 2: + col = self.col + row = self.row + elif self.ndim == 1: + col = self.indices[0] + row = np.zeros_like(col) + else: + raise NotImplementedError( + f"coo_matvec not implemented for ndim={self.ndim}") + + coo_matvec(self.nnz, row, col, self.data, other, result) + # Array semantics return a scalar here, not a single-element array. + if isinstance(self, sparray) and result_shape == 1: + return result[0] return result def _mul_multivector(self, other): - result = np.zeros((other.shape[1], self.shape[0]), - dtype=upcast_char(self.dtype.char, other.dtype.char)) - for i, col in enumerate(other.T): - coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i]) + result_dtype = upcast_char(self.dtype.char, other.dtype.char) + if self.ndim == 2: + result_shape = (other.shape[1], self.shape[0]) + col = self.col + row = self.row + elif self.ndim == 1: + result_shape = (other.shape[1],) + col = self.indices[0] + row = np.zeros_like(col) + else: + raise NotImplementedError( + f"coo_matvec not implemented for ndim={self.ndim}") + + result = np.zeros(result_shape, dtype=result_dtype) + for i, other_col in enumerate(other.T): + coo_matvec(self.nnz, row, col, self.data, other_col, result[i:i + 1]) return result.T.view(type=type(other)) @@ -528,40 +601,37 @@ class coo_array(_coo_base, sparray): This can be instantiated in several ways: coo_array(D) - where D is a 2-D ndarray + where D is an ndarray coo_array(S) with another sparse array or matrix S (equivalent to S.tocoo()) - coo_array((M, N), [dtype]) - to construct an empty array with shape (M, N) + coo_array(shape, [dtype]) + to construct an empty sparse array with shape `shape` dtype is optional, defaulting to dtype='d'. - coo_array((data, (i, j)), [shape=(M, N)]) - to construct from three arrays: - 1. data[:] the entries of the array, in any order - 2. i[:] the row indices of the array entries - 3. j[:] the column indices of the array entries + coo_array((data, indices), [shape]) + to construct from existing data and index arrays: + 1. data[:] the entries of the sparse array, in any order + 2. indices[i][:] the axis-i indices of the data entries - Where ``A[i[k], j[k]] = data[k]``. When shape is not - specified, it is inferred from the index arrays + Where ``A[indices] = data``, and indices is a tuple of index arrays. + When shape is not specified, it is inferred from the index arrays. Attributes ---------- dtype : dtype - Data type of the array - shape : 2-tuple - Shape of the array + Data type of the sparse array + shape : tuple of integers + Shape of the sparse array ndim : int - Number of dimensions (this is always 2) + Number of dimensions of the sparse array nnz size data - COO format data array of the array - row - COO format row index array of the array - col - COO format column index array of the array + COO format data array of the sparse array + indices + COO format tuple of index arrays has_canonical_format : bool Whether the matrix has sorted indices and no duplicates format @@ -599,7 +669,7 @@ class coo_array(_coo_base, sparray): Examples -------- - >>> # Constructing an empty array + >>> # Constructing an empty sparse array >>> import numpy as np >>> from scipy.sparse import coo_array >>> coo_array((3, 4), dtype=np.int8).toarray() @@ -607,7 +677,7 @@ class coo_array(_coo_base, sparray): [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8) - >>> # Constructing an array using ijv format + >>> # Constructing a sparse array using ijv format >>> row = np.array([0, 3, 1, 0]) >>> col = np.array([0, 3, 1, 2]) >>> data = np.array([4, 5, 7, 9]) @@ -617,7 +687,7 @@ class coo_array(_coo_base, sparray): [0, 0, 0, 0], [0, 0, 0, 5]]) - >>> # Constructing an array with duplicate indices + >>> # Constructing a sparse array with duplicate indices >>> row = np.array([0, 0, 1, 3, 1, 0, 0]) >>> col = np.array([0, 2, 1, 3, 1, 0, 0]) >>> data = np.array([1, 1, 1, 1, 1, 1, 1]) @@ -747,3 +817,9 @@ class coo_matrix(spmatrix, _coo_base): """ + def __setstate__(self, state): + if 'indices' not in state: + # For retro-compatibility with the previous attributes + # storing nnz coordinates for 2D COO matrix. + state['indices'] = (state.pop('row'), state.pop('col')) + self.__dict__.update(state) diff --git a/scipy/sparse/_sputils.py b/scipy/sparse/_sputils.py index a646ec416cea..155646be6a90 100644 --- a/scipy/sparse/_sputils.py +++ b/scipy/sparse/_sputils.py @@ -292,8 +292,26 @@ def validateaxis(axis) -> None: raise ValueError("axis out of range") -def check_shape(args, current_shape=None): - """Imitate numpy.matrix handling of shape arguments""" +def check_shape(args, current_shape=None, allow_ndim=False) -> tuple[int, ...]: + """Imitate numpy.matrix handling of shape arguments + + Parameters + ---------- + args : array_like + Data structures providing information about the shape of the sparse array. + current_shape : tuple, optional + The current shape of the sparse array or matrix. + If None (default), the current shape will be inferred from args. + allow_ndim : bool, optional + If True, then n-D arrays are accepted. + If False (default), then only 2-D arrays are accepted and an error is + raised otherwise. + + Returns + ------- + new_shape: tuple + The new shape after validation. + """ if len(args) == 0: raise TypeError("function missing 1 required positional argument: " "'shape'") @@ -308,7 +326,7 @@ def check_shape(args, current_shape=None): new_shape = tuple(operator.index(arg) for arg in args) if current_shape is None: - if len(new_shape) != 2: + if not allow_ndim and len(new_shape) != 2: raise ValueError('shape must be a 2-tuple of positive integers') elif any(d < 0 for d in new_shape): raise ValueError("'shape' elements cannot be negative") @@ -335,7 +353,7 @@ def check_shape(args, current_shape=None): else: raise ValueError('can only specify one unknown dimension') - if len(new_shape) != 2: + if not allow_ndim and len(new_shape) != 2: raise ValueError('matrix shape must be two-dimensional') return new_shape diff --git a/scipy/sparse/tests/meson.build b/scipy/sparse/tests/meson.build index 07b14b446757..b0959d8cea05 100644 --- a/scipy/sparse/tests/meson.build +++ b/scipy/sparse/tests/meson.build @@ -2,6 +2,7 @@ python_sources = [ '__init__.py', 'test_array_api.py', 'test_base.py', + 'test_coo.py', 'test_construct.py', 'test_deprecations.py', 'test_csc.py', diff --git a/scipy/sparse/tests/test_base.py b/scipy/sparse/tests/test_base.py index 2d52fd8eb339..2e662a0b38e7 100644 --- a/scipy/sparse/tests/test_base.py +++ b/scipy/sparse/tests/test_base.py @@ -1196,13 +1196,13 @@ def test_todense(self): chk = self.datsp.todense(out=out) assert_array_equal(self.dat, out) assert_array_equal(self.dat, chk) - assert_(chk.base is out) + assert np.may_share_memory(chk, out) # Check with out array (matrix). out = asmatrix(np.zeros(self.datsp.shape, dtype=self.datsp.dtype)) chk = self.datsp.todense(out=out) assert_array_equal(self.dat, out) assert_array_equal(self.dat, chk) - assert_(chk is out) + assert np.may_share_memory(chk, out) a = array([[1.,2.,3.]]) dense_dot_dense = a @ self.dat check = a @ self.datsp.todense() @@ -1302,9 +1302,8 @@ def test_astype_immutable(self): S = self.spcreator(D) if hasattr(S, 'data'): S.data.flags.writeable = False - if hasattr(S, 'indptr'): + if S.format in ('csr', 'csc', 'bsr'): S.indptr.flags.writeable = False - if hasattr(S, 'indices'): S.indices.flags.writeable = False for x in supported_dtypes: D_casted = D.astype(x) @@ -2037,9 +2036,17 @@ def check(): assert_equal(datsp.shape, sploaded.shape) assert_array_equal(datsp.toarray(), sploaded.toarray()) assert_equal(datsp.format, sploaded.format) + # Hacky check for class member equality. This assumes that + # all instance variables are one of: + # 1. Plain numpy ndarrays + # 2. Tuples of ndarrays + # 3. Types that support equality comparison with == for key, val in datsp.__dict__.items(): if isinstance(val, np.ndarray): assert_array_equal(val, sploaded.__dict__[key]) + elif (isinstance(val, tuple) and val + and isinstance(val[0], np.ndarray)): + assert_array_equal(val, sploaded.__dict__[key]) else: assert_(val == sploaded.__dict__[key]) check() diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py new file mode 100644 index 000000000000..5b3ef68ec02a --- /dev/null +++ b/scipy/sparse/tests/test_coo.py @@ -0,0 +1,283 @@ +import numpy as np +import pytest +from scipy.sparse import coo_array + + +def test_shape_constructor(): + empty1d = coo_array((3,)) + assert empty1d.shape == (3,) + assert np.array_equal(empty1d.toarray(), np.zeros((3,))) + + empty2d = coo_array((3, 2)) + assert empty2d.shape == (3, 2) + assert np.array_equal(empty2d.toarray(), np.zeros((3, 2))) + + empty3d = coo_array((3, 2, 2)) + assert empty3d.shape == (3, 2, 2) + with pytest.raises(ValueError, + match='Cannot densify higher-rank sparse array'): + empty3d.toarray() + + +def test_dense_constructor(): + res1d = coo_array([1, 2, 3]) + assert res1d.shape == (3,) + assert np.array_equal(res1d.toarray(), np.array([1, 2, 3])) + + res2d = coo_array([[1, 2, 3], [4, 5, 6]]) + assert res2d.shape == (2, 3) + assert np.array_equal(res2d.toarray(), np.array([[1, 2, 3], [4, 5, 6]])) + + res3d = coo_array([[[3]], [[4]]]) + assert res3d.shape == (2, 1, 1) + + +def test_dense_constructor_with_shape(): + res1d = coo_array([1, 2, 3], shape=(3,)) + assert res1d.shape == (3,) + assert np.array_equal(res1d.toarray(), np.array([1, 2, 3])) + + res2d = coo_array([[1, 2, 3], [4, 5, 6]], shape=(2, 3)) + assert res2d.shape == (2, 3) + assert np.array_equal(res2d.toarray(), np.array([[1, 2, 3], [4, 5, 6]])) + + res3d = coo_array([[[3]], [[4]]], shape=(2, 1, 1)) + assert res3d.shape == (2, 1, 1) + + +def test_dense_constructor_with_inconsistent_shape(): + with pytest.raises(ValueError, match='inconsistent shapes'): + coo_array([1, 2, 3], shape=(4,)) + + with pytest.raises(ValueError, match='inconsistent shapes'): + coo_array([1, 2, 3], shape=(3, 1)) + + with pytest.raises(ValueError, match='inconsistent shapes'): + coo_array([[1, 2, 3]], shape=(3,)) + + with pytest.raises(ValueError, + match='axis 0 index 2 exceeds matrix dimension 2'): + coo_array(([1], ([2],)), shape=(2,)) + + with pytest.raises(ValueError, match='negative axis 0 index: -1'): + coo_array(([1], ([-1],))) + + +def test_1d_sparse_constructor(): + empty1d = coo_array((3,)) + res = coo_array(empty1d) + assert res.shape == (3,) + assert np.array_equal(res.toarray(), np.zeros((3,))) + + +def test_1d_tuple_constructor(): + res = coo_array(([9,8], ([1,2],))) + assert res.shape == (3,) + assert np.array_equal(res.toarray(), np.array([0, 9, 8])) + + +def test_1d_tuple_constructor_with_shape(): + res = coo_array(([9,8], ([1,2],)), shape=(4,)) + assert res.shape == (4,) + assert np.array_equal(res.toarray(), np.array([0, 9, 8, 0])) + +def test_non_subscriptability(): + coo_2d = coo_array((2, 2)) + + with pytest.raises(TypeError, + match="'coo_array' object does not support item assignment"): + coo_2d[0, 0] = 1 + + with pytest.raises(TypeError, + match="'coo_array' object is not subscriptable"): + coo_2d[0, :] + +def test_reshape(): + arr1d = coo_array([1, 0, 3]) + assert arr1d.shape == (3,) + + col_vec = arr1d.reshape((3, 1)) + assert col_vec.shape == (3, 1) + assert np.array_equal(col_vec.toarray(), np.array([[1], [0], [3]])) + + row_vec = arr1d.reshape((1, 3)) + assert row_vec.shape == (1, 3) + assert np.array_equal(row_vec.toarray(), np.array([[1, 0, 3]])) + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]) + assert arr2d.shape == (2, 3) + + flat = arr2d.reshape((6,)) + assert flat.shape == (6,) + assert np.array_equal(flat.toarray(), np.array([1, 2, 0, 0, 0, 3])) + + +def test_nnz(): + arr1d = coo_array([1, 0, 3]) + assert arr1d.shape == (3,) + assert arr1d.nnz == 2 + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]) + assert arr2d.shape == (2, 3) + assert arr2d.nnz == 3 + + arr3d = coo_array([[[1, 2, 0], [0, 0, 0]]]) + assert arr3d.shape == (1, 2, 3) + assert arr3d.nnz == 2 + + +def test_transpose(): + arr1d = coo_array([1, 0, 3]).T + assert arr1d.shape == (3,) + assert np.array_equal(arr1d.toarray(), np.array([1, 0, 3])) + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]).T + assert arr2d.shape == (3, 2) + assert np.array_equal(arr2d.toarray(), np.array([[1, 0], [2, 0], [0, 3]])) + + arr3d = coo_array([[[1, 2, 0], [0, 0, 0]]]).T + assert arr3d.shape == (3, 2, 1) + + +def test_transpose_with_axis(): + arr1d = coo_array([1, 0, 3]).transpose(axes=(0,)) + assert arr1d.shape == (3,) + assert np.array_equal(arr1d.toarray(), np.array([1, 0, 3])) + + arr2d = coo_array([[1, 2, 0], [0, 0, 3]]).transpose(axes=(0, 1)) + assert arr2d.shape == (2, 3) + assert np.array_equal(arr2d.toarray(), np.array([[1, 2, 0], [0, 0, 3]])) + + arr3d = coo_array([[[1, 2, 0], [0, 0, 0]]]).transpose(axes=(1, 0, 2)) + assert arr3d.shape == (2, 1, 3) + + with pytest.raises(ValueError, match="axes don't match matrix dimensions"): + coo_array([1, 0, 3]).transpose(axes=(0, 1)) + + with pytest.raises(ValueError, match="repeated axis in transpose"): + coo_array([[1, 2, 0], [0, 0, 3]]).transpose(axes=(1, 1)) + + +def test_1d_row_and_col(): + res = coo_array([1, -2, -3]) + assert np.array_equal(res.row, np.array([0, 1, 2])) + assert np.array_equal(res.col, np.zeros_like(res.row)) + assert res.row.dtype == res.col.dtype + + res.row = [1, 2, 3] + assert len(res.indices) == 1 + assert np.array_equal(res.row, np.array([1, 2, 3])) + assert res.row.dtype == res.col.dtype + + with pytest.raises(ValueError, match="cannot set col attribute"): + res.col = [1, 2, 3] + + +def test_1d_tocsc_tocsr_todia_todok(): + res = coo_array([1, -2, -3]) + for f in [res.tocsc, res.tocsr, res.todok, res.todia]: + with pytest.raises(ValueError, match='Cannot convert'): + f() + + +@pytest.mark.parametrize('arg', [1, 2, 4, 5, 8]) +def test_1d_resize(arg: int): + den = np.array([1, -2, -3]) + res = coo_array(den) + den.resize(arg) + res.resize(arg) + assert res.shape == den.shape + assert np.array_equal(res.toarray(), den) + + +@pytest.mark.parametrize('arg', zip([1, 2, 3, 4], [1, 2, 3, 4])) +def test_1d_to_2d_resize(arg: tuple[int, int]): + den = np.array([1, 0, 3]) + res = coo_array(den) + + den.resize(arg) + res.resize(arg) + assert res.shape == den.shape + assert np.array_equal(res.toarray(), den) + + +@pytest.mark.parametrize('arg', [1, 4, 6, 8]) +def test_2d_to_1d_resize(arg: int): + den = np.array([[1, 0, 3], [4, 0, 0]]) + res = coo_array(den) + den.resize(arg) + res.resize(arg) + assert res.shape == den.shape + assert np.array_equal(res.toarray(), den) + + +def test_sum_duplicates(): + arr1d = coo_array(([2, 2, 2], ([1, 0, 1],))) + assert arr1d.nnz == 3 + assert np.array_equal(arr1d.toarray(), np.array([2, 4])) + arr1d.sum_duplicates() + assert arr1d.nnz == 2 + assert np.array_equal(arr1d.toarray(), np.array([2, 4])) + + +def test_eliminate_zeros(): + arr1d = coo_array(([0, 0, 1], ([1, 0, 1],))) + assert arr1d.nnz == 3 + assert arr1d.count_nonzero() == 1 + assert np.array_equal(arr1d.toarray(), np.array([0, 1])) + arr1d.eliminate_zeros() + assert arr1d.nnz == 1 + assert arr1d.count_nonzero() == 1 + assert np.array_equal(arr1d.toarray(), np.array([0, 1])) + assert np.array_equal(arr1d.row, np.array([1])) + + +def test_1d_add_dense(): + den_a = np.array([0, -2, -3, 0]) + den_b = np.array([0, 1, 2, 3]) + exp = den_a + den_b + res = coo_array(den_a) + den_b + assert type(res) == type(exp) + assert np.array_equal(res, exp) + + +def test_1d_add_sparse(): + den_a = np.array([0, -2, -3, 0]) + den_b = np.array([0, 1, 2, 3]) + # Currently this routes through CSR format, so 1d sparse addition + # isn't supported. + with pytest.raises(ValueError, + match='Cannot convert a 1d sparse array'): + coo_array(den_a) + coo_array(den_b) + + +def test_1d_mul_vector(): + den_a = np.array([0, -2, -3, 0]) + den_b = np.array([0, 1, 2, 3]) + exp = den_a @ den_b + res = coo_array(den_a) @ den_b + assert type(res) == type(exp) + assert np.array_equal(res, exp) + + +def test_1d_mul_multivector(): + den = np.array([0, -2, -3, 0]) + other = np.array([[0, 1, 2, 3], [3, 2, 1, 0]]).T + exp = den @ other + res = coo_array(den) @ other + assert type(res) == type(exp) + assert np.array_equal(res, exp) + + +def test_2d_mul_multivector(): + den = np.array([[0, 1, 2, 3], [3, 2, 1, 0]]) + arr2d = coo_array(den) + exp = den @ den.T + res = arr2d @ arr2d.T + assert np.array_equal(res.toarray(), exp) + + +def test_1d_diagonal(): + den = np.array([0, -2, -3, 0]) + with pytest.raises(ValueError, match='diagonal requires two dimensions'): + coo_array(den).diagonal() From 05a75ce81c804318b0ea5f4b30f9d828e5f76012 Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Tue, 24 Oct 2023 20:11:10 -0400 Subject: [PATCH 02/14] fix misc test and mypy errors --- scipy/sparse/_base.py | 2 +- scipy/sparse/_coo.py | 20 +++++++++++++++++--- scipy/sparse/tests/test_coo.py | 12 ++++++------ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/scipy/sparse/_base.py b/scipy/sparse/_base.py index 8dbb23c3a163..0db7725c8c67 100644 --- a/scipy/sparse/_base.py +++ b/scipy/sparse/_base.py @@ -355,7 +355,7 @@ def real(self): def imag(self): return self._imag() - def __repr__(self) -> str: + def __repr__(self): _, format_name = _formats[self.format] sparse_cls = 'array' if isinstance(self, sparray) else 'matrix' shape_str = 'x'.join(str(x) for x in self.shape) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index ceae5e7524ef..64a20f2bd087 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -128,8 +128,22 @@ def reshape(self, *args, **kwargs): else: return self - # TODO: Handle overflow as in https://github.com/scipy/scipy/pull/9132 - flat_indices = np.ravel_multi_index(self.indices, self.shape, order=order) + # Handle overflow as in https://github.com/scipy/scipy/pull/9132 + if self.ndim == 1: + flat_indices = self.indices[0] + else: + nrows, ncols = self.shape + row, col = self.indices + if order == 'C': + maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1)) + idx_dtype = self._get_index_dtype(maxval=maxval) + flat_indices = np.multiply(ncols, row, dtype=idx_dtype) + col + elif order == 'F': + maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1)) + idx_dtype = self._get_index_dtype(maxval=maxval) + flat_indices = np.multiply(nrows, col, dtype=idx_dtype) + row + else: + raise ValueError("'order' must be 'C' or 'F'") new_indices = np.unravel_index(flat_indices, shape, order=order) # Handle copy here rather than passing on to the constructor so that no @@ -218,7 +232,7 @@ def resize(self, *shape) -> None: # Check for added dimensions. if len(shape) > self.ndim: flat_indices = np.ravel_multi_index(self.indices, self.shape) - max_size = np.prod(shape) + max_size = shape[0] if len(shape) == 1 else shape[0] * shape[1] self.indices = np.unravel_index(flat_indices[:max_size], shape) self.data = self.data[:max_size] self._shape = shape diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py index 5b3ef68ec02a..090c44220612 100644 --- a/scipy/sparse/tests/test_coo.py +++ b/scipy/sparse/tests/test_coo.py @@ -83,7 +83,7 @@ def test_1d_tuple_constructor_with_shape(): def test_non_subscriptability(): coo_2d = coo_array((2, 2)) - + with pytest.raises(TypeError, match="'coo_array' object does not support item assignment"): coo_2d[0, 0] = 1 @@ -95,7 +95,7 @@ def test_non_subscriptability(): def test_reshape(): arr1d = coo_array([1, 0, 3]) assert arr1d.shape == (3,) - + col_vec = arr1d.reshape((3, 1)) assert col_vec.shape == (3, 1) assert np.array_equal(col_vec.toarray(), np.array([[1], [0], [3]])) @@ -184,7 +184,7 @@ def test_1d_tocsc_tocsr_todia_todok(): def test_1d_resize(arg: int): den = np.array([1, -2, -3]) res = coo_array(den) - den.resize(arg) + den.resize(arg, refcheck=False) res.resize(arg) assert res.shape == den.shape assert np.array_equal(res.toarray(), den) @@ -195,7 +195,7 @@ def test_1d_to_2d_resize(arg: tuple[int, int]): den = np.array([1, 0, 3]) res = coo_array(den) - den.resize(arg) + den.resize(arg, refcheck=False) res.resize(arg) assert res.shape == den.shape assert np.array_equal(res.toarray(), den) @@ -205,7 +205,7 @@ def test_1d_to_2d_resize(arg: tuple[int, int]): def test_2d_to_1d_resize(arg: int): den = np.array([[1, 0, 3], [4, 0, 0]]) res = coo_array(den) - den.resize(arg) + den.resize(arg, refcheck=False) res.resize(arg) assert res.shape == den.shape assert np.array_equal(res.toarray(), den) @@ -256,7 +256,7 @@ def test_1d_mul_vector(): den_b = np.array([0, 1, 2, 3]) exp = den_a @ den_b res = coo_array(den_a) @ den_b - assert type(res) == type(exp) + assert np.ndim(res) == 0 assert np.array_equal(res, exp) From b7fe14f1f7a106e2b8c3e7667d62b3fbd141eb0f Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 1 Nov 2023 00:43:42 -0400 Subject: [PATCH 03/14] Minor tweaks on top of Dan's PR --- scipy/sparse/_coo.py | 45 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 64a20f2bd087..5e3fea7f4d85 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -4,6 +4,7 @@ __all__ = ['coo_array', 'coo_matrix', 'isspmatrix_coo'] +import math from warnings import warn import numpy as np @@ -13,7 +14,7 @@ from ._base import issparse, SparseEfficiencyWarning, _spbase, sparray from ._data import _data_matrix, _minmax_mixin from ._sputils import (upcast, upcast_char, to_native, isshape, getdtype, - getdata, downcast_intp_index, + getdata, downcast_intp_index, get_index_dtype, check_shape, check_reshape_kwargs) import operator @@ -128,22 +129,7 @@ def reshape(self, *args, **kwargs): else: return self - # Handle overflow as in https://github.com/scipy/scipy/pull/9132 - if self.ndim == 1: - flat_indices = self.indices[0] - else: - nrows, ncols = self.shape - row, col = self.indices - if order == 'C': - maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1)) - idx_dtype = self._get_index_dtype(maxval=maxval) - flat_indices = np.multiply(ncols, row, dtype=idx_dtype) + col - elif order == 'F': - maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1)) - idx_dtype = self._get_index_dtype(maxval=maxval) - flat_indices = np.multiply(nrows, col, dtype=idx_dtype) + row - else: - raise ValueError("'order' must be 'C' or 'F'") + flat_indices = _ravel_indices(self.indices, self.shape, order=order) new_indices = np.unravel_index(flat_indices, shape, order=order) # Handle copy here rather than passing on to the constructor so that no @@ -231,8 +217,8 @@ def resize(self, *shape) -> None: # Check for added dimensions. if len(shape) > self.ndim: - flat_indices = np.ravel_multi_index(self.indices, self.shape) - max_size = shape[0] if len(shape) == 1 else shape[0] * shape[1] + flat_indices = _ravel_indices(self.indices, self.shape) + max_size = math.prod(shape) self.indices = np.unravel_index(flat_indices[:max_size], shape) self.data = self.data[:max_size] self._shape = shape @@ -580,6 +566,27 @@ def _mul_multivector(self, other): return result.T.view(type=type(other)) +def _ravel_indices(indices, shape, order='C'): + """Like np.ravel_multi_index, but avoids some overflow issues.""" + # Handle overflow as in https://github.com/scipy/scipy/pull/9132 + if len(indices) == 1: + return indices[0] + if len(indices) == 2: + nrows, ncols = shape + row, col = indices + if order == 'C': + maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1)) + idx_dtype = get_index_dtype(maxval=maxval) + return np.multiply(ncols, row, dtype=idx_dtype) + col + elif order == 'F': + maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1)) + idx_dtype = get_index_dtype(maxval=maxval) + return np.multiply(nrows, col, dtype=idx_dtype) + row + else: + raise ValueError("'order' must be 'C' or 'F'") + return np.ravel_multi_index(indices, shape, order=order) + + def isspmatrix_coo(x): """Is `x` of coo_matrix type? From 97cad3403d5c21b7480f542f33b6ec3acc02ca44 Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Wed, 1 Nov 2023 08:12:12 -0400 Subject: [PATCH 04/14] manually unravel to avoid 32bit overflows --- scipy/sparse/_coo.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 5e3fea7f4d85..83da9ae061c5 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -129,8 +129,18 @@ def reshape(self, *args, **kwargs): else: return self - flat_indices = _ravel_indices(self.indices, self.shape, order=order) - new_indices = np.unravel_index(flat_indices, shape, order=order) + if self.ndim == 1: + new_indices = np.unravel_index(self.indices[0], shape, order=order) + else: + flat_indices = _ravel_indices(self.indices, self.shape, order=order) + if len(shape) == 1: + new_indices = (flat_indices,) + else: + if order == 'C': + new_indices = divmod(flat_indices, shape[1]) + elif order == 'F': + new_indices = divmod(flat_indices, shape[0])[::-1] + # else: already checked in _ravel_indices # Handle copy here rather than passing on to the constructor so that no # copy will be made of new_row and new_col regardless @@ -569,8 +579,6 @@ def _mul_multivector(self, other): def _ravel_indices(indices, shape, order='C'): """Like np.ravel_multi_index, but avoids some overflow issues.""" # Handle overflow as in https://github.com/scipy/scipy/pull/9132 - if len(indices) == 1: - return indices[0] if len(indices) == 2: nrows, ncols = shape row, col = indices @@ -584,6 +592,8 @@ def _ravel_indices(indices, shape, order='C'): return np.multiply(nrows, col, dtype=idx_dtype) + row else: raise ValueError("'order' must be 'C' or 'F'") + if len(indices) == 1: + return indices[0] return np.ravel_multi_index(indices, shape, order=order) From c53aae8c35eb7a58b855ef9b332b8cfe255b45b0 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Wed, 1 Nov 2023 09:32:21 -0400 Subject: [PATCH 05/14] More tweaks on top of Dan's PR --- scipy/sparse/_coo.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 83da9ae061c5..2fb29fa18711 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -129,21 +129,20 @@ def reshape(self, *args, **kwargs): else: return self - if self.ndim == 1: - new_indices = np.unravel_index(self.indices[0], shape, order=order) - else: - flat_indices = _ravel_indices(self.indices, self.shape, order=order) - if len(shape) == 1: - new_indices = (flat_indices,) + # When reducing the number of dimensions, we need to be careful about + # index overflow. This is why we can't simply call + # `np.ravel_multi_index()` followed by `np.unravel_index()` here. + flat_indices = _ravel_indices(self.indices, self.shape, order=order) + if len(shape) == 2: + if order == 'C': + new_indices = divmod(flat_indices, shape[1]) else: - if order == 'C': - new_indices = divmod(flat_indices, shape[1]) - elif order == 'F': - new_indices = divmod(flat_indices, shape[0])[::-1] - # else: already checked in _ravel_indices + new_indices = divmod(flat_indices, shape[0])[::-1] + else: + new_indices = np.unravel_index(flat_indices, shape, order=order) # Handle copy here rather than passing on to the constructor so that no - # copy will be made of new_row and new_col regardless + # copy will be made of `new_indices` regardless. if copy: new_data = self.data.copy() else: @@ -578,6 +577,8 @@ def _mul_multivector(self, other): def _ravel_indices(indices, shape, order='C'): """Like np.ravel_multi_index, but avoids some overflow issues.""" + if len(indices) == 1: + return indices[0] # Handle overflow as in https://github.com/scipy/scipy/pull/9132 if len(indices) == 2: nrows, ncols = shape @@ -592,8 +593,6 @@ def _ravel_indices(indices, shape, order='C'): return np.multiply(nrows, col, dtype=idx_dtype) + row else: raise ValueError("'order' must be 'C' or 'F'") - if len(indices) == 1: - return indices[0] return np.ravel_multi_index(indices, shape, order=order) From 6caca812f331b8ecd89f07f02bfc5e17599434a7 Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Fri, 10 Nov 2023 10:57:28 -0500 Subject: [PATCH 06/14] leftpad in toarray, row, col; coo 1darrays now row=0, col=indexes --- scipy/sparse/_coo.py | 27 ++++++++++++++------------- scipy/sparse/tests/test_coo.py | 20 +++++++++++--------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 2fb29fa18711..88249fdfd081 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -99,23 +99,26 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): @property def row(self): - return self.indices[0] + if self.ndim > 1: + return self.indices[0] + return np.zeros(self.nnz, dtype=self.indices[0].dtype) + @row.setter def row(self, new_row): + if self.ndim < 2: + raise ValueError('cannot set row attribute of a 1-dimensional sparse array') new_row = np.asarray(new_row, dtype=self.indices[0].dtype) self.indices = (new_row,) + self.indices[1:] @property def col(self): - return self.indices[1] if self.ndim > 1 else np.zeros_like(self.row) + return self.indices[-1] @col.setter def col(self, new_col): - if self.ndim < 2: - raise ValueError('cannot set col attribute of a 1-dimensional sparse array') - new_col = np.asarray(new_col, dtype=self.indices[1].dtype) - self.indices = self.indices[:1] + (new_col,) + self.indices[2:] + new_col = np.asarray(new_col, dtype=self.indices[-1].dtype) + self.indices = self.indices[:-1] + (new_col,) def reshape(self, *args, **kwargs): is_array = isinstance(self, sparray) @@ -267,7 +270,7 @@ def toarray(self, order=None, out=None): raise ValueError("Cannot densify higher-rank sparse array") # This handles both 0D and 1D cases correctly regardless of the # original shape. - M, N, *_ = self.shape + (1, 1) + *_, M, N = (1, 1) + self.shape coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel('A'), fortran) # Note: reshape() doesn't copy here, but does return a new array (view). @@ -275,8 +278,6 @@ def toarray(self, order=None, out=None): toarray.__doc__ = _spbase.toarray.__doc__ - toarray.__doc__ = _spbase.toarray.__doc__ - def tocsc(self, copy=False): """Convert this array/matrix to Compressed Sparse Column format @@ -304,7 +305,7 @@ def tocsc(self, copy=False): else: M,N = self.shape idx_dtype = self._get_index_dtype( - (self.col, self.row), maxval=max(self.nnz, M) + (self.col, self.row), maxval=max(self.nnz, *self.shape) ) row = self.row.astype(idx_dtype, copy=False) col = self.col.astype(idx_dtype, copy=False) @@ -348,7 +349,7 @@ def tocsr(self, copy=False): else: M,N = self.shape idx_dtype = self._get_index_dtype( - (self.row, self.col), maxval=max(self.nnz, N) + (self.row, self.col), maxval=max(self.nnz, *self.shape) ) row = self.row.astype(idx_dtype, copy=False) col = self.col.astype(idx_dtype, copy=False) @@ -528,8 +529,8 @@ def _add_dense(self, other): dtype = upcast_char(self.dtype.char, other.dtype.char) result = np.array(other, dtype=dtype, copy=True) fortran = int(result.flags.f_contiguous) - M = self.shape[0] - N = self.shape[1] if self.ndim > 1 else 1 + M = self.shape[0] if self.ndim > 1 else 1 + N = self.shape[-1] coo_todense(M, N, self.nnz, self.row, self.col, self.data, result.ravel('A'), fortran) return self._container(result, copy=False) diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py index 090c44220612..3fdc26c8c5c0 100644 --- a/scipy/sparse/tests/test_coo.py +++ b/scipy/sparse/tests/test_coo.py @@ -160,17 +160,17 @@ def test_transpose_with_axis(): def test_1d_row_and_col(): res = coo_array([1, -2, -3]) - assert np.array_equal(res.row, np.array([0, 1, 2])) - assert np.array_equal(res.col, np.zeros_like(res.row)) + assert np.array_equal(res.col, np.array([0, 1, 2])) + assert np.array_equal(res.row, np.zeros_like(res.row)) assert res.row.dtype == res.col.dtype - res.row = [1, 2, 3] + res.col = [1, 2, 3] assert len(res.indices) == 1 - assert np.array_equal(res.row, np.array([1, 2, 3])) + assert np.array_equal(res.col, np.array([1, 2, 3])) assert res.row.dtype == res.col.dtype - with pytest.raises(ValueError, match="cannot set col attribute"): - res.col = [1, 2, 3] + with pytest.raises(ValueError, match="cannot set row attribute"): + res.row = [1, 2, 3] def test_1d_tocsc_tocsr_todia_todok(): @@ -229,7 +229,8 @@ def test_eliminate_zeros(): assert arr1d.nnz == 1 assert arr1d.count_nonzero() == 1 assert np.array_equal(arr1d.toarray(), np.array([0, 1])) - assert np.array_equal(arr1d.row, np.array([1])) + assert np.array_equal(arr1d.col, np.array([1])) + assert np.array_equal(arr1d.row, np.array([0])) def test_1d_add_dense(): @@ -237,7 +238,7 @@ def test_1d_add_dense(): den_b = np.array([0, 1, 2, 3]) exp = den_a + den_b res = coo_array(den_a) + den_b - assert type(res) == type(exp) + assert isinstance(res, np.ndarray) assert np.array_equal(res, exp) @@ -256,6 +257,7 @@ def test_1d_mul_vector(): den_b = np.array([0, 1, 2, 3]) exp = den_a @ den_b res = coo_array(den_a) @ den_b + assert isinstance(res, np.int_) assert np.ndim(res) == 0 assert np.array_equal(res, exp) @@ -265,7 +267,7 @@ def test_1d_mul_multivector(): other = np.array([[0, 1, 2, 3], [3, 2, 1, 0]]).T exp = den @ other res = coo_array(den) @ other - assert type(res) == type(exp) + assert isinstance(res, np.ndarray) assert np.array_equal(res, exp) From 365ba55a5a8203bddc0da174c39c077c5e3afe17 Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Sat, 25 Nov 2023 08:59:51 -0500 Subject: [PATCH 07/14] change M -> A for dense array --- scipy/sparse/_coo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 88249fdfd081..9ac46b82e439 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -74,22 +74,22 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): self.has_canonical_format = False else: # dense argument - M = np.asarray(arg1) + A = np.asarray(arg1) if not is_array: - M = np.atleast_2d(M) - if M.ndim != 2: + A = np.atleast_2d(A) + if A.ndim != 2: raise TypeError('expected dimension <= 2 array or matrix') - self._shape = check_shape(M.shape, allow_ndim=is_array) + self._shape = check_shape(A.shape, allow_ndim=is_array) if shape is not None: if check_shape(shape, allow_ndim=is_array) != self._shape: raise ValueError('inconsistent shapes: %s != %s' % (shape, self._shape)) index_dtype = self._get_index_dtype(maxval=max(self._shape)) - indices = M.nonzero() + indices = A.nonzero() self.indices = tuple(idx.astype(index_dtype, copy=False) for idx in indices) - self.data = M[indices] + self.data = A[indices] self.has_canonical_format = True if dtype is not None: From 85894f96bee67fbde193ea63e8a2d12ba5ce7d4f Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Sat, 25 Nov 2023 09:17:20 -0500 Subject: [PATCH 08/14] change row property --- scipy/sparse/_coo.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 9ac46b82e439..7a0a103af4d4 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -99,17 +99,15 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): @property def row(self): - if self.ndim > 1: - return self.indices[0] - return np.zeros(self.nnz, dtype=self.indices[0].dtype) + return self.indices[-2] if self.ndim > 1 else np.zeros_like(self.col) @row.setter def row(self, new_row): if self.ndim < 2: raise ValueError('cannot set row attribute of a 1-dimensional sparse array') - new_row = np.asarray(new_row, dtype=self.indices[0].dtype) - self.indices = (new_row,) + self.indices[1:] + new_row = np.asarray(new_row, dtype=self.indices[-2].dtype) + self.indices = self.indices[:-2] + (new_row,) + self.indices[-1:] @property def col(self): From 51ed57eeff43b49d042f83a36bf271dc0815afdd Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Sat, 25 Nov 2023 08:47:17 -0500 Subject: [PATCH 09/14] add unified interface _shape_as_2d to get 2d version of shape --- scipy/sparse/_base.py | 5 +++++ scipy/sparse/_coo.py | 5 ++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/scipy/sparse/_base.py b/scipy/sparse/_base.py index 0db7725c8c67..69444fc33144 100644 --- a/scipy/sparse/_base.py +++ b/scipy/sparse/_base.py @@ -72,6 +72,11 @@ class _spbase: def ndim(self) -> int: return len(self._shape) + @property + def _shape_as_2d(self): + s = self._shape + return (1, s[-1]) if len(s) == 1 else s + @property def _bsr_container(self): from ._bsr import bsr_array diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 7a0a103af4d4..39b34a3e6a29 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -268,7 +268,7 @@ def toarray(self, order=None, out=None): raise ValueError("Cannot densify higher-rank sparse array") # This handles both 0D and 1D cases correctly regardless of the # original shape. - *_, M, N = (1, 1) + self.shape + M, N = self._shape_as_2d coo_todense(M, N, self.nnz, self.row, self.col, self.data, B.ravel('A'), fortran) # Note: reshape() doesn't copy here, but does return a new array (view). @@ -527,8 +527,7 @@ def _add_dense(self, other): dtype = upcast_char(self.dtype.char, other.dtype.char) result = np.array(other, dtype=dtype, copy=True) fortran = int(result.flags.f_contiguous) - M = self.shape[0] if self.ndim > 1 else 1 - N = self.shape[-1] + M, N = self._shape_as_2d coo_todense(M, N, self.nnz, self.row, self.col, self.data, result.ravel('A'), fortran) return self._container(result, copy=False) From 68f244b0e25cb16ab4de276f969c8827c4fcf7f0 Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Tue, 28 Nov 2023 14:54:35 -0500 Subject: [PATCH 10/14] revert non-1d related changes --- scipy/sparse/_coo.py | 16 ++++++++-------- scipy/sparse/tests/test_coo.py | 7 +++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 39b34a3e6a29..07f5c7f1bc88 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -74,22 +74,22 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): self.has_canonical_format = False else: # dense argument - A = np.asarray(arg1) + M = np.asarray(arg1) if not is_array: - A = np.atleast_2d(A) - if A.ndim != 2: + M = np.atleast_2d(M) + if M.ndim != 2: raise TypeError('expected dimension <= 2 array or matrix') - self._shape = check_shape(A.shape, allow_ndim=is_array) + self._shape = check_shape(M.shape, allow_ndim=is_array) if shape is not None: if check_shape(shape, allow_ndim=is_array) != self._shape: raise ValueError('inconsistent shapes: %s != %s' % (shape, self._shape)) index_dtype = self._get_index_dtype(maxval=max(self._shape)) - indices = A.nonzero() + indices = M.nonzero() self.indices = tuple(idx.astype(index_dtype, copy=False) for idx in indices) - self.data = A[indices] + self.data = M[indices] self.has_canonical_format = True if dtype is not None: @@ -303,7 +303,7 @@ def tocsc(self, copy=False): else: M,N = self.shape idx_dtype = self._get_index_dtype( - (self.col, self.row), maxval=max(self.nnz, *self.shape) + (self.col, self.row), maxval=max(self.nnz, M) ) row = self.row.astype(idx_dtype, copy=False) col = self.col.astype(idx_dtype, copy=False) @@ -347,7 +347,7 @@ def tocsr(self, copy=False): else: M,N = self.shape idx_dtype = self._get_index_dtype( - (self.row, self.col), maxval=max(self.nnz, *self.shape) + (self.row, self.col), maxval=max(self.nnz, N) ) row = self.row.astype(idx_dtype, copy=False) col = self.col.astype(idx_dtype, copy=False) diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py index 3fdc26c8c5c0..1aa2f96c5fe4 100644 --- a/scipy/sparse/tests/test_coo.py +++ b/scipy/sparse/tests/test_coo.py @@ -161,7 +161,7 @@ def test_transpose_with_axis(): def test_1d_row_and_col(): res = coo_array([1, -2, -3]) assert np.array_equal(res.col, np.array([0, 1, 2])) - assert np.array_equal(res.row, np.zeros_like(res.row)) + assert np.array_equal(res.row, np.zeros_like(res.col)) assert res.row.dtype == res.col.dtype res.col = [1, 2, 3] @@ -238,7 +238,7 @@ def test_1d_add_dense(): den_b = np.array([0, 1, 2, 3]) exp = den_a + den_b res = coo_array(den_a) + den_b - assert isinstance(res, np.ndarray) + assert type(res) == type(exp) assert np.array_equal(res, exp) @@ -257,7 +257,6 @@ def test_1d_mul_vector(): den_b = np.array([0, 1, 2, 3]) exp = den_a @ den_b res = coo_array(den_a) @ den_b - assert isinstance(res, np.int_) assert np.ndim(res) == 0 assert np.array_equal(res, exp) @@ -267,7 +266,7 @@ def test_1d_mul_multivector(): other = np.array([[0, 1, 2, 3], [3, 2, 1, 0]]).T exp = den @ other res = coo_array(den) @ other - assert isinstance(res, np.ndarray) + assert type(res) == type(exp) assert np.array_equal(res, exp) From 7fb0853f3b999c564f8b7374e1540e19a402408f Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Tue, 5 Dec 2023 09:15:49 -0500 Subject: [PATCH 11/14] Rename allow_ndim to allow_1d We don't want to support dimensions > 2 just yet. --- scipy/sparse/_coo.py | 20 +++++++++----------- scipy/sparse/_sputils.py | 22 +++++++++++++--------- scipy/sparse/tests/test_coo.py | 25 ++++++------------------- scipy/sparse/tests/test_sputils.py | 12 ++++++------ 4 files changed, 34 insertions(+), 45 deletions(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index ed915787c5e2..58698e06d667 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -28,8 +28,8 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): is_array = isinstance(self, sparray) if isinstance(arg1, tuple): - if isshape(arg1, allow_ndim=is_array): - self._shape = check_shape(arg1, allow_ndim=is_array) + if isshape(arg1, allow_1d=is_array): + self._shape = check_shape(arg1, allow_1d=is_array) idx_dtype = self._get_index_dtype(maxval=max(self._shape)) data_dtype = getdtype(dtype, default=float) self.indices = tuple(np.array([], dtype=idx_dtype) @@ -48,7 +48,7 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): 'sized index arrays') shape = tuple(operator.index(np.max(idx)) + 1 for idx in indices) - self._shape = check_shape(shape, allow_ndim=is_array) + self._shape = check_shape(shape, allow_1d=is_array) idx_dtype = self._get_index_dtype(indices, maxval=max(self.shape), @@ -62,15 +62,13 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): if arg1.format == self.format and copy: self.indices = tuple(idx.copy() for idx in arg1.indices) self.data = arg1.data.copy() - self._shape = check_shape(arg1.shape, - allow_ndim=is_array) + self._shape = check_shape(arg1.shape, allow_1d=is_array) self.has_canonical_format = arg1.has_canonical_format else: coo = arg1.tocoo() self.indices = tuple(coo.indices) self.data = coo.data - self._shape = check_shape(coo.shape, - allow_ndim=is_array) + self._shape = check_shape(coo.shape, allow_1d=is_array) self.has_canonical_format = False else: # dense argument @@ -80,9 +78,9 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): if M.ndim != 2: raise TypeError('expected dimension <= 2 array or matrix') - self._shape = check_shape(M.shape, allow_ndim=is_array) + self._shape = check_shape(M.shape, allow_1d=is_array) if shape is not None: - if check_shape(shape, allow_ndim=is_array) != self._shape: + if check_shape(shape, allow_1d=is_array) != self._shape: raise ValueError(f'inconsistent shapes: {shape} != {self._shape}') index_dtype = self._get_index_dtype(maxval=max(self._shape)) indices = M.nonzero() @@ -119,7 +117,7 @@ def col(self, new_col): def reshape(self, *args, **kwargs): is_array = isinstance(self, sparray) - shape = check_shape(args, self.shape, allow_ndim=is_array) + shape = check_shape(args, self.shape, allow_1d=is_array) order, copy = check_reshape_kwargs(kwargs) # Return early if reshape is not required @@ -222,7 +220,7 @@ def transpose(self, axes=None, copy=False): def resize(self, *shape) -> None: is_array = isinstance(self, sparray) - shape = check_shape(shape, allow_ndim=is_array) + shape = check_shape(shape, allow_1d=is_array) # Check for added dimensions. if len(shape) > self.ndim: diff --git a/scipy/sparse/_sputils.py b/scipy/sparse/_sputils.py index e70578096dce..3bdb8f43b633 100644 --- a/scipy/sparse/_sputils.py +++ b/scipy/sparse/_sputils.py @@ -236,14 +236,14 @@ def isintlike(x) -> bool: return True -def isshape(x, nonneg=False, allow_ndim=False) -> bool: +def isshape(x, nonneg=False, *, allow_1d=False) -> bool: """Is x a valid tuple of dimensions? If nonneg, also checks that the dimensions are non-negative. - If allow_ndim, shapes of any dimensionality are allowed. + If allow_1d, shapes of length 1 or 2 are allowed. """ ndim = len(x) - if not allow_ndim and ndim != 2: + if ndim != 2 and not (allow_1d and ndim == 1): return False for d in x: if not isintlike(d): @@ -292,7 +292,7 @@ def validateaxis(axis) -> None: raise ValueError("axis out of range") -def check_shape(args, current_shape=None, allow_ndim=False) -> tuple[int, ...]: +def check_shape(args, current_shape=None, *, allow_1d=False) -> tuple[int, ...]: """Imitate numpy.matrix handling of shape arguments Parameters @@ -302,8 +302,8 @@ def check_shape(args, current_shape=None, allow_ndim=False) -> tuple[int, ...]: current_shape : tuple, optional The current shape of the sparse array or matrix. If None (default), the current shape will be inferred from args. - allow_ndim : bool, optional - If True, then n-D arrays are accepted. + allow_1d : bool, optional + If True, then 1-D or 2-D arrays are accepted. If False (default), then only 2-D arrays are accepted and an error is raised otherwise. @@ -326,9 +326,13 @@ def check_shape(args, current_shape=None, allow_ndim=False) -> tuple[int, ...]: new_shape = tuple(operator.index(arg) for arg in args) if current_shape is None: - if not allow_ndim and len(new_shape) != 2: + if allow_1d: + if len(new_shape) not in (1, 2): + raise ValueError('shape must be a 1- or 2-tuple of positive ' + 'integers') + elif len(new_shape) != 2: raise ValueError('shape must be a 2-tuple of positive integers') - elif any(d < 0 for d in new_shape): + if any(d < 0 for d in new_shape): raise ValueError("'shape' elements cannot be negative") else: # Check the current size only if needed @@ -353,7 +357,7 @@ def check_shape(args, current_shape=None, allow_ndim=False) -> tuple[int, ...]: else: raise ValueError('can only specify one unknown dimension') - if not allow_ndim and len(new_shape) != 2: + if len(new_shape) != 2 and not (allow_1d and len(new_shape) == 1): raise ValueError('matrix shape must be two-dimensional') return new_shape diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py index 1aa2f96c5fe4..b7a6cee97b72 100644 --- a/scipy/sparse/tests/test_coo.py +++ b/scipy/sparse/tests/test_coo.py @@ -12,11 +12,8 @@ def test_shape_constructor(): assert empty2d.shape == (3, 2) assert np.array_equal(empty2d.toarray(), np.zeros((3, 2))) - empty3d = coo_array((3, 2, 2)) - assert empty3d.shape == (3, 2, 2) - with pytest.raises(ValueError, - match='Cannot densify higher-rank sparse array'): - empty3d.toarray() + with pytest.raises(TypeError, match='invalid input format'): + coo_array((3, 2, 2)) def test_dense_constructor(): @@ -28,8 +25,8 @@ def test_dense_constructor(): assert res2d.shape == (2, 3) assert np.array_equal(res2d.toarray(), np.array([[1, 2, 3], [4, 5, 6]])) - res3d = coo_array([[[3]], [[4]]]) - assert res3d.shape == (2, 1, 1) + with pytest.raises(ValueError, match='shape must be a 1- or 2-tuple'): + coo_array([[[3]], [[4]]]) def test_dense_constructor_with_shape(): @@ -41,8 +38,8 @@ def test_dense_constructor_with_shape(): assert res2d.shape == (2, 3) assert np.array_equal(res2d.toarray(), np.array([[1, 2, 3], [4, 5, 6]])) - res3d = coo_array([[[3]], [[4]]], shape=(2, 1, 1)) - assert res3d.shape == (2, 1, 1) + with pytest.raises(ValueError, match='shape must be a 1- or 2-tuple'): + coo_array([[[3]], [[4]]], shape=(2, 1, 1)) def test_dense_constructor_with_inconsistent_shape(): @@ -121,10 +118,6 @@ def test_nnz(): assert arr2d.shape == (2, 3) assert arr2d.nnz == 3 - arr3d = coo_array([[[1, 2, 0], [0, 0, 0]]]) - assert arr3d.shape == (1, 2, 3) - assert arr3d.nnz == 2 - def test_transpose(): arr1d = coo_array([1, 0, 3]).T @@ -135,9 +128,6 @@ def test_transpose(): assert arr2d.shape == (3, 2) assert np.array_equal(arr2d.toarray(), np.array([[1, 0], [2, 0], [0, 3]])) - arr3d = coo_array([[[1, 2, 0], [0, 0, 0]]]).T - assert arr3d.shape == (3, 2, 1) - def test_transpose_with_axis(): arr1d = coo_array([1, 0, 3]).transpose(axes=(0,)) @@ -148,9 +138,6 @@ def test_transpose_with_axis(): assert arr2d.shape == (2, 3) assert np.array_equal(arr2d.toarray(), np.array([[1, 2, 0], [0, 0, 3]])) - arr3d = coo_array([[[1, 2, 0], [0, 0, 0]]]).transpose(axes=(1, 0, 2)) - assert arr3d.shape == (2, 1, 3) - with pytest.raises(ValueError, match="axes don't match matrix dimensions"): coo_array([1, 0, 3]).transpose(axes=(0, 1)) diff --git a/scipy/sparse/tests/test_sputils.py b/scipy/sparse/tests/test_sputils.py index 94f5de26e1f5..4545b49bea2c 100644 --- a/scipy/sparse/tests/test_sputils.py +++ b/scipy/sparse/tests/test_sputils.py @@ -67,13 +67,13 @@ def test_isshape(self): assert_equal(sputils.isshape((-1, 2), nonneg=True),False) assert_equal(sputils.isshape((2, -1), nonneg=True),False) - assert_equal(sputils.isshape((1.5, 2), allow_ndim=True), False) - assert_equal(sputils.isshape(([2], 2), allow_ndim=True), False) - assert_equal(sputils.isshape((2, 2, -2), nonneg=True, allow_ndim=True), + assert_equal(sputils.isshape((1.5, 2), allow_1d=True), False) + assert_equal(sputils.isshape(([2], 2), allow_1d=True), False) + assert_equal(sputils.isshape((2, 2, -2), nonneg=True, allow_1d=True), False) - assert_equal(sputils.isshape((2,), allow_ndim=True), True) - assert_equal(sputils.isshape((2, 2,), allow_ndim=True), True) - assert_equal(sputils.isshape((2, 2, 2), allow_ndim=True), True) + assert_equal(sputils.isshape((2,), allow_1d=True), True) + assert_equal(sputils.isshape((2, 2,), allow_1d=True), True) + assert_equal(sputils.isshape((2, 2, 2), allow_1d=True), False) def test_issequence(self): assert_equal(sputils.issequence((1,)), True) From 966415df93bbebdee913d519c3b2f7a6f3855f55 Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Tue, 5 Dec 2023 20:19:10 -0500 Subject: [PATCH 12/14] gentle handling of 1d in other code --- scipy/sparse/_base.py | 5 +++-- scipy/sparse/_compressed.py | 2 ++ scipy/sparse/_construct.py | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scipy/sparse/_base.py b/scipy/sparse/_base.py index 6775d084ea54..82062b2a0a71 100644 --- a/scipy/sparse/_base.py +++ b/scipy/sparse/_base.py @@ -581,8 +581,7 @@ def _mul_dispatch(self, other): # Currently matrix multiplication is only supported # for 2D arrays. Hence we unpacked and use only the # two last axes' lengths. - N = self.shape[-1] - M = self.shape[-2] if self.ndim > 1 else 1 + M, N = self._shape_as_2d if other.__class__ is np.ndarray: # Fast path for the most common case @@ -600,6 +599,8 @@ def _mul_dispatch(self, other): if issparse(other): if self.shape[1] != other.shape[0]: raise ValueError('dimension mismatch') + if other.ndim == 1: + raise ValueError('Cannot yet multiply a 1d sparse array') return self._mul_sparse_matrix(other) # If it's a list or whatever, treat it like an array diff --git a/scipy/sparse/_compressed.py b/scipy/sparse/_compressed.py index 45dcfc6cc7d5..f11fa927dfd1 100644 --- a/scipy/sparse/_compressed.py +++ b/scipy/sparse/_compressed.py @@ -370,6 +370,8 @@ def multiply(self, other): if self.shape == other.shape: other = self.__class__(other) return self._binopt(other, '_elmul_') + if other.ndim == 1: + raise TypeError("broadcast from a 1d array not yet supported") # Single element. elif other.shape == (1, 1): return self._mul_scalar(other.toarray()[0, 0]) diff --git a/scipy/sparse/_construct.py b/scipy/sparse/_construct.py index 542d9f34d70a..264f56c130d9 100644 --- a/scipy/sparse/_construct.py +++ b/scipy/sparse/_construct.py @@ -935,6 +935,8 @@ def block_diag(mats, format=None, dtype=None): nrows, ncols = a.shape if issparse(a): a = a.tocoo() + if a.ndim == 1: + a = a.reshape((a._shape_as_2d)) row.append(a.row + r_idx) col.append(a.col + c_idx) data.append(a.data) From 41b75922d5b5a8d024cdfd7408742890affe845e Mon Sep 17 00:00:00 2001 From: Dan Schult Date: Thu, 7 Dec 2023 07:07:17 -0500 Subject: [PATCH 13/14] add test for block_diag and make work --- scipy/sparse/_construct.py | 5 ++--- scipy/sparse/tests/test_construct.py | 6 ++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/scipy/sparse/_construct.py b/scipy/sparse/_construct.py index 264f56c130d9..a15869ffe350 100644 --- a/scipy/sparse/_construct.py +++ b/scipy/sparse/_construct.py @@ -932,15 +932,14 @@ def block_diag(mats, format=None, dtype=None): for a in mats: if isinstance(a, (list, numbers.Number)): a = coo_array(np.atleast_2d(a)) - nrows, ncols = a.shape if issparse(a): a = a.tocoo() - if a.ndim == 1: - a = a.reshape((a._shape_as_2d)) + nrows, ncols = a._shape_as_2d row.append(a.row + r_idx) col.append(a.col + c_idx) data.append(a.data) else: + nrows, ncols = a.shape a_row, a_col = np.divmod(np.arange(nrows*ncols), ncols) row.append(a_row + r_idx) col.append(a_col + c_idx) diff --git a/scipy/sparse/tests/test_construct.py b/scipy/sparse/tests/test_construct.py index 0788d8dc8302..a4fe1040b5b2 100644 --- a/scipy/sparse/tests/test_construct.py +++ b/scipy/sparse/tests/test_construct.py @@ -565,6 +565,12 @@ def test_block_diag_scalar_1d_args(self): # one 1d matrix and a scalar assert_array_equal(construct.block_diag([[2,3], 4]).toarray(), [[2, 3, 0], [0, 0, 4]]) + # 1d sparse arrays + A = coo_array([1,0,3]) + B = coo_array([0,4]) + assert_array_equal(construct.block_diag([A, B]).toarray(), + [[1, 0, 3, 0, 0], [0, 0, 0, 0, 4]]) + def test_block_diag_1(self): """ block_diag with one matrix """ From 264533fada55085629efa57f459f532f56dfac00 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 18 Dec 2023 15:07:05 -0500 Subject: [PATCH 14/14] Fix lint issue --- scipy/sparse/_coo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py index 38e5bdd402fb..b30fb8edf8d0 100644 --- a/scipy/sparse/_coo.py +++ b/scipy/sparse/_coo.py @@ -81,7 +81,8 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False): self._shape = check_shape(M.shape, allow_1d=is_array) if shape is not None: if check_shape(shape, allow_1d=is_array) != self._shape: - raise ValueError(f'inconsistent shapes: {shape} != {self._shape}') + message = f'inconsistent shapes: {shape} != {self._shape}' + raise ValueError(message) index_dtype = self._get_index_dtype(maxval=max(self._shape)) indices = M.nonzero() self.indices = tuple(idx.astype(index_dtype, copy=False)