diff --git a/scipy/sparse/__init__.py b/scipy/sparse/__init__.py
index 875ce6dd8298..d9263834fb59 100644
--- a/scipy/sparse/__init__.py
+++ b/scipy/sparse/__init__.py
@@ -152,30 +152,30 @@
 Usage information
 =================
 
-There are seven available sparse matrix types:
-
-    1. csc_matrix: Compressed Sparse Column format
-    2. csr_matrix: Compressed Sparse Row format
-    3. bsr_matrix: Block Sparse Row format
-    4. lil_matrix: List of Lists format
-    5. dok_matrix: Dictionary of Keys format
-    6. coo_matrix: COOrdinate format (aka IJV, triplet format)
-    7. dia_matrix: DIAgonal format
-
-To construct a matrix efficiently, use either dok_matrix or lil_matrix.
-The lil_matrix class supports basic slicing and fancy indexing with a
+There are seven available sparse array types:
+
+    1. `csc_array`: Compressed Sparse Column format
+    2. `csr_array`: Compressed Sparse Row format
+    3. `bsr_array`: Block Sparse Row format
+    4. `lil_array`: List of Lists format
+    5. `dok_array`: Dictionary of Keys format
+    6. `coo_array`: COOrdinate format (aka IJV, triplet format)
+    7. `dia_array`: DIAgonal format
+
+To construct an array efficiently, use either `dok_array` or `lil_array`.
+The `lil_array` class supports basic slicing and fancy indexing with a
 similar syntax to NumPy arrays. As illustrated below, the COO format
-may also be used to efficiently construct matrices. Despite their
+may also be used to efficiently construct arrays. Despite their
 similarity to NumPy arrays, it is **strongly discouraged** to use NumPy
-functions directly on these matrices because NumPy may not properly convert
+functions directly on these arrays because NumPy may not properly convert
 them for computations, leading to unexpected (and incorrect) results. If you
-do want to apply a NumPy function to these matrices, first check if SciPy has
-its own implementation for the given sparse matrix class, or **convert the
-sparse matrix to a NumPy array** (e.g., using the `toarray()` method of the
+do want to apply a NumPy function to these arrays, first check if SciPy has
+its own implementation for the given sparse array class, or **convert the
+sparse array to a NumPy array** (e.g., using the ``toarray`` method of the
 class) first before applying the method.
 
 To perform manipulations such as multiplication or inversion, first
-convert the matrix to either CSC or CSR format. The lil_matrix format is
+convert the array to either CSC or CSR format. The `lil_array` format is
 row-based, so conversion to CSR is efficient, whereas conversion to CSC
 is less so.
 
@@ -184,17 +184,17 @@
 
 Matrix vector product
 ---------------------
-To do a vector product between a sparse matrix and a vector simply use
-the matrix `dot` method, as described in its docstring:
+To do a vector product between a sparse array and a vector simply use
+the array ``dot`` method, as described in its docstring:
 
 >>> import numpy as np
->>> from scipy.sparse import csr_matrix
->>> A = csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
+>>> from scipy.sparse import csr_array
+>>> A = csr_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
 >>> v = np.array([1, 0, -1])
 >>> A.dot(v)
 array([ 1, -3, -1], dtype=int64)
 
-.. warning:: As of NumPy 1.7, `np.dot` is not aware of sparse matrices,
+.. warning:: As of NumPy 1.7, ``np.dot`` is not aware of sparse arrays,
   therefore using it will result on unexpected results or errors.
   The corresponding dense array should be obtained first instead:
 
@@ -203,18 +203,18 @@
 
   but then all the performance advantages would be lost.
 
-The CSR format is specially suitable for fast matrix vector products.
+The CSR format is especially suitable for fast matrix vector products.
 
 Example 1
 ---------
-Construct a 1000x1000 lil_matrix and add some values to it:
+Construct a 1000x1000 `lil_array` and add some values to it:
 
->>> from scipy.sparse import lil_matrix
+>>> from scipy.sparse import lil_array
 >>> from scipy.sparse.linalg import spsolve
 >>> from numpy.linalg import solve, norm
 >>> from numpy.random import rand
 
->>> A = lil_matrix((1000, 1000))
+>>> A = lil_array((1000, 1000))
 >>> A[0, :100] = rand(100)
 >>> A[1, 100:200] = A[0, :100]
 >>> A.setdiag(rand(1000))
@@ -225,7 +225,7 @@
 >>> b = rand(1000)
 >>> x = spsolve(A, b)
 
-Convert it to a dense matrix and solve, and check that the result
+Convert it to a dense array and solve, and check that the result
 is the same:
 
 >>> x_ = solve(A.toarray(), b)
@@ -242,14 +242,14 @@
 Example 2
 ---------
 
-Construct a matrix in COO format:
+Construct an array in COO format:
 
 >>> from scipy import sparse
 >>> from numpy import array
 >>> I = array([0,3,1,0])
 >>> J = array([0,3,1,2])
 >>> V = array([4,5,7,9])
->>> A = sparse.coo_matrix((V,(I,J)),shape=(4,4))
+>>> A = sparse.coo_array((V,(I,J)),shape=(4,4))
 
 Notice that the indices do not need to be sorted.
 
@@ -258,7 +258,7 @@
 >>> I = array([0,0,1,3,1,0,0])
 >>> J = array([0,2,1,3,1,0,0])
 >>> V = array([1,1,1,1,1,1,1])
->>> B = sparse.coo_matrix((V,(I,J)),shape=(4,4)).tocsr()
+>>> B = sparse.coo_array((V,(I,J)),shape=(4,4)).tocsr()
 
 This is useful for constructing finite-element stiffness and mass matrices.
 
@@ -266,7 +266,7 @@
 ---------------
 
 CSR column indices are not necessarily sorted. Likewise for CSC row
-indices. Use the .sorted_indices() and .sort_indices() methods when
+indices. Use the ``.sorted_indices()`` and ``.sort_indices()`` methods when
 sorted indices are required (e.g., when passing data to other libraries).
 
 """
diff --git a/scipy/sparse/_base.py b/scipy/sparse/_base.py
index f781e5be3912..aa7ce11c9a85 100644
--- a/scipy/sparse/_base.py
+++ b/scipy/sparse/_base.py
@@ -120,7 +120,7 @@ def __init__(self, maxprint=MAXPRINT):
     def reshape(self, *args, **kwargs):
         """reshape(self, shape, order='C', copy=False)
 
-        Gives a new shape to a sparse array without changing its data.
+        Gives a new shape to a sparse array/matrix without changing its data.
 
         Parameters
         ----------
@@ -139,8 +139,8 @@ def reshape(self, *args, **kwargs):
 
         Returns
         -------
-        reshaped : sparse array
-            A sparse array with the given `shape`, not necessarily of the same
+        reshaped : sparse array/matrix
+            A sparse array/matrix with the given `shape`, not necessarily of the same
             format as the current object.
 
         See Also
@@ -160,7 +160,7 @@ def reshape(self, *args, **kwargs):
         return self.tocoo(copy=copy).reshape(shape, order=order, copy=False)
 
     def resize(self, shape):
-        """Resize the array in-place to dimensions given by ``shape``
+        """Resize the array/matrix in-place to dimensions given by ``shape``
 
         Any elements that lie within the new shape will remain at the same
         indices, while non-zero elements lying outside the new shape are
@@ -169,7 +169,7 @@ def resize(self, shape):
         Parameters
         ----------
         shape : (int, int)
-            number of rows and columns in the new array
+            number of rows and columns in the new array/matrix
 
         Notes
         -----
@@ -187,7 +187,7 @@ def resize(self, shape):
             f'{type(self).__name__}.resize is not implemented')
 
     def astype(self, dtype, casting='unsafe', copy=True):
-        """Cast the array elements to a specified type.
+        """Cast the array/matrix elements to a specified type.
 
         Parameters
         ----------
@@ -204,8 +204,8 @@ def astype(self, dtype, casting='unsafe', copy=True):
             'unsafe' means any data conversions may be done.
         copy : bool, optional
             If `copy` is `False`, the result might share some memory with this
-            array. If `copy` is `True`, it is guaranteed that the result and
-            this array do not share any memory.
+            array/matrix. If `copy` is `True`, it is guaranteed that the result and
+            this array/matrix do not share any memory.
         """
 
         dtype = np.dtype(dtype)
@@ -396,7 +396,7 @@ def __len__(self):
                         " or shape[0]")
 
     def asformat(self, format, copy=False):
-        """Return this array in the passed format.
+        """Return this array/matrix in the passed format.
 
         Parameters
         ----------
@@ -408,7 +408,7 @@ def asformat(self, format, copy=False):
 
         Returns
         -------
-        A : This array in the passed format.
+        A : This array/matrix in the passed format.
         """
         if format is None or format == self.format:
             if copy:
@@ -435,16 +435,15 @@ def asformat(self, format, copy=False):
     ####################################################################
 
     def multiply(self, other):
-        """Point-wise multiplication by another array
-        """
+        """Point-wise multiplication by another array/matrix."""
         return self.tocsr().multiply(other)
 
     def maximum(self, other):
-        """Element-wise maximum between this and another array."""
+        """Element-wise maximum between this and another array/matrix."""
         return self.tocsr().maximum(other)
 
     def minimum(self, other):
-        """Element-wise minimum between this and another array."""
+        """Element-wise minimum between this and another array/matrix."""
         return self.tocsr().minimum(other)
 
     def dot(self, other):
@@ -770,7 +769,7 @@ def __pow__(self, *args, **kwargs):
 
     def transpose(self, axes=None, copy=False):
         """
-        Reverses the dimensions of the sparse array.
+        Reverses the dimensions of the sparse array/matrix.
 
         Parameters
         ----------
@@ -781,7 +780,7 @@ def transpose(self, axes=None, copy=False):
         copy : bool, optional
             Indicates whether or not attributes of `self` should be
             copied whenever possible. The degree to which attributes
-            are copied varies depending on the type of sparse array
+            are copied varies depending on the type of sparse array/matrix
             being used.
 
         Returns
@@ -797,7 +796,7 @@ def transpose(self, axes=None, copy=False):
     def conjugate(self, copy=True):
         """Element-wise complex conjugation.
 
-        If the array is of non-complex data type and `copy` is False,
+        If the array/matrix is of non-complex data type and `copy` is False,
         this method does nothing and the data is not copied.
 
         Parameters
@@ -829,7 +828,7 @@ def _imag(self):
         return self.tocsr()._imag()
 
     def nonzero(self):
-        """nonzero indices
+        """Nonzero indices of the array/matrix.
 
         Returns a tuple of arrays (row,col) containing the indices
         of the non-zero elements of the array.
@@ -902,7 +901,7 @@ def _getrow(self, i):
 
     def todense(self, order=None, out=None):
         """
-        Return a dense matrix representation of this sparse array.
+        Return a dense representation of this sparse array/matrix.
 
         Parameters
         ----------
@@ -917,14 +916,14 @@ def todense(self, order=None, out=None):
             If specified, uses this array (or `numpy.matrix`) as the
             output buffer instead of allocating a new array to
             return. The provided array must have the same shape and
-            dtype as the sparse array on which you are calling the
+            dtype as the sparse array/matrix on which you are calling the
             method.
 
         Returns
         -------
         arr : numpy.matrix, 2-D
             A NumPy matrix object with the same shape and containing
-            the same data represented by the sparse array, with the
+            the same data represented by the sparse array/matrix, with the
             requested memory order. If `out` was passed and was an
             array (rather than a `numpy.matrix`), it will be filled
             with the appropriate values and returned wrapped in a
@@ -934,7 +933,7 @@ def todense(self, order=None, out=None):
 
     def toarray(self, order=None, out=None):
         """
-        Return a dense ndarray representation of this sparse array.
+        Return a dense ndarray representation of this sparse array/matrix.
 
         Parameters
         ----------
@@ -949,7 +948,7 @@ def toarray(self, order=None, out=None):
             If specified, uses this array as the output buffer
             instead of allocating a new array to return. The provided
             array must have the same shape and dtype as the sparse
-            array on which you are calling the method. For most
+            array/matrix on which you are calling the method. For most
             sparse types, `out` is required to be memory contiguous
             (either C or Fortran ordered).
 
@@ -957,7 +956,7 @@ def toarray(self, order=None, out=None):
         -------
         arr : ndarray, 2-D
             An array with the same shape and containing the same
-            data represented by the sparse array, with the requested
+            data represented by the sparse array/matrix, with the requested
             memory order. If `out` was passed, the same object is
             returned after being modified in-place to contain the
             appropriate values.
@@ -968,84 +967,84 @@ def toarray(self, order=None, out=None):
     # tocsr or tocoo. The other conversion methods may be implemented for
     # efficiency, but are not required.
     def tocsr(self, copy=False):
-        """Convert this array to Compressed Sparse Row format.
+        """Convert this array/matrix to Compressed Sparse Row format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant csr_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant csr_array/matrix.
         """
         return self.tocoo(copy=copy).tocsr(copy=False)
 
     def todok(self, copy=False):
-        """Convert this array to Dictionary Of Keys format.
+        """Convert this array/matrix to Dictionary Of Keys format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant dok_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant dok_array/matrix.
         """
         return self.tocoo(copy=copy).todok(copy=False)
 
     def tocoo(self, copy=False):
-        """Convert this array to COOrdinate format.
+        """Convert this array/matrix to COOrdinate format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant coo_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant coo_array/matrix.
         """
         return self.tocsr(copy=False).tocoo(copy=copy)
 
     def tolil(self, copy=False):
-        """Convert this array to List of Lists format.
+        """Convert this array/matrix to List of Lists format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant lil_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant lil_array/matrix.
         """
         return self.tocsr(copy=False).tolil(copy=copy)
 
     def todia(self, copy=False):
-        """Convert this array to sparse DIAgonal format.
+        """Convert this array/matrix to sparse DIAgonal format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant dia_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant dia_array/matrix.
         """
         return self.tocoo(copy=copy).todia(copy=False)
 
     def tobsr(self, blocksize=None, copy=False):
-        """Convert this array to Block Sparse Row format.
+        """Convert this array/matrix to Block Sparse Row format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant bsr_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant bsr_array/matrix.
 
         When blocksize=(R, C) is provided, it will be used for construction of
-        the bsr_array.
+        the bsr_array/matrix.
         """
         return self.tocsr(copy=False).tobsr(blocksize=blocksize, copy=copy)
 
     def tocsc(self, copy=False):
-        """Convert this array to Compressed Sparse Column format.
+        """Convert this array/matrix to Compressed Sparse Column format.
 
-        With copy=False, the data/indices may be shared between this array and
-        the resultant csc_array.
+        With copy=False, the data/indices may be shared between this array/matrix and
+        the resultant csc_array/matrix.
         """
         return self.tocsr(copy=copy).tocsc(copy=False)
 
     def copy(self):
-        """Returns a copy of this array.
+        """Returns a copy of this array/matrix.
 
         No data/indices will be shared between the returned value and current
-        array.
+        array/matrix.
         """
         return self.__class__(self, copy=True)
 
     def sum(self, axis=None, dtype=None, out=None):
         """
-        Sum the array elements over a given axis.
+        Sum the array/matrix elements over a given axis.
 
         Parameters
         ----------
         axis : {-2, -1, 0, 1, None} optional
             Axis along which the sum is computed. The default is to
-            compute the sum of all the array elements, returning a scalar
+            compute the sum of all the array/matrix elements, returning a scalar
             (i.e., `axis` = `None`).
         dtype : dtype, optional
-            The type of the returned array and of the accumulator in which
+            The type of the returned array/matrix and of the accumulator in which
             the elements are summed.  The dtype of `a` is used by default
             unless `a` has an integer dtype of less precision than the default
             platform integer.  In that case, if `a` is signed then the platform
@@ -1112,8 +1111,8 @@ def mean(self, axis=None, dtype=None, out=None):
         """
         Compute the arithmetic mean along the specified axis.
 
-        Returns the average of the array elements. The average is taken
-        over all elements in the array by default, otherwise over the
+        Returns the average of the array/matrix elements. The average is taken
+        over all elements in the array/matrix by default, otherwise over the
         specified axis. `float64` intermediate and return values are used
         for integer inputs.
 
@@ -1121,7 +1120,7 @@ def mean(self, axis=None, dtype=None, out=None):
         ----------
         axis : {-2, -1, 0, 1, None} optional
             Axis along which the mean is computed. The default is to compute
-            the mean of all elements in the array (i.e., `axis` = `None`).
+            the mean of all elements in the array/matrix (i.e., `axis` = `None`).
         dtype : data-type, optional
             Type to use in computing the mean. For integer inputs, the default
             is `float64`; for floating point inputs, it is the same as the
@@ -1182,7 +1181,7 @@ def _is_integral(dtype):
                 axis=1, dtype=res_dtype, out=out)
 
     def diagonal(self, k=0):
-        """Returns the kth diagonal of the array.
+        """Returns the kth diagonal of the array/matrix.
 
         Parameters
         ----------
@@ -1208,7 +1207,7 @@ def diagonal(self, k=0):
         return self.tocsr().diagonal(k=k)
 
     def trace(self, offset=0):
-        """Returns the sum along diagonals of the sparse array.
+        """Returns the sum along diagonals of the sparse array/matrix.
 
         Parameters
         ----------
@@ -1221,7 +1220,7 @@ def trace(self, offset=0):
 
     def setdiag(self, values, k=0):
         """
-        Set diagonal or off-diagonal elements of the array.
+        Set diagonal or off-diagonal elements of the array/matrix.
 
         Parameters
         ----------
@@ -1316,7 +1315,7 @@ def _get_index_dtype(self, arrays=(), maxval=None, check_contents=False):
     ## Also uncomment the definition of shape above.
 
     def get_shape(self):
-        """Get shape of a sparse array.
+        """Get shape of a sparse array/matrix.
 
         .. deprecated:: 1.11.0
            This method will be removed in SciPy 1.13.0.
@@ -1359,7 +1358,7 @@ def set_shape(self, shape):
     )  # noqa: F811
 
     def asfptype(self):
-        """Upcast array to a floating point format (if necessary)
+        """Upcast array/matrix to a floating point format (if necessary)
 
         .. deprecated:: 1.11.0
            This method is for internal use only, and will be removed from the
@@ -1387,7 +1386,7 @@ def getmaxprint(self):
         return self._getmaxprint()
 
     def getformat(self):
-        """Matrix storage format.
+        """Sparse array/matrix storage format.
 
         .. deprecated:: 1.11.0
            This method will be removed in SciPy 1.13.0.
@@ -1411,7 +1410,7 @@ def getnnz(self, axis=None):
         Parameters
         ----------
         axis : None, 0, or 1
-            Select between the number of values across the whole array, in
+            Select between the number of values across the whole array/matrix, in
             each column, or in each row.
 
         See also
@@ -1426,7 +1425,7 @@ def getnnz(self, axis=None):
         return self._getnnz(axis=axis)
 
     def getH(self):
-        """Return the Hermitian transpose of this array.
+        """Return the Hermitian transpose of this array/matrix.
 
         .. deprecated:: 1.11.0
            This method will be removed in SciPy 1.13.0.
@@ -1440,12 +1439,12 @@ def getH(self):
         return self.conjugate().transpose()
 
     def getcol(self, j):
-        """Returns a copy of column j of the array, as an (m x 1) sparse
-        array (column vector).
+        """Returns a copy of column j of the array/matrix, as an (m x 1) sparse
+        array/matrix (column vector).
 
         .. deprecated:: 1.11.0
            This method will be removed in SciPy 1.13.0.
-           Use array indexing instead.
+           Use array/matrix indexing instead.
         """
         msg = (
             "`getcol` is deprecated and will be removed in v1.13.0; "
@@ -1455,12 +1454,12 @@ def getcol(self, j):
         return self._getcol(j)
 
     def getrow(self, i):
-        """Returns a copy of row i of the array, as a (1 x n) sparse
-        array (row vector).
+        """Returns a copy of row i of the array/matrix, as a (1 x n) sparse
+        array/matrix (row vector).
 
         .. deprecated:: 1.11.0
            This method will be removed in SciPy 1.13.0.
-           Use array indexing instead.
+           Use array/matrix indexing instead.
         """
         msg = (
             "`getrow` is deprecated and will be removed in v1.13.0; "
@@ -1480,12 +1479,12 @@ class sparray:
 
 
 def issparse(x):
-    """Is `x` of a sparse array type?
+    """Is `x` of a sparse array or sparse matrix type?
 
     Parameters
     ----------
     x
-        object to check for being a sparse array
+        object to check for being a sparse array or sparse matrix
 
     Returns
     -------
diff --git a/scipy/sparse/_bsr.py b/scipy/sparse/_bsr.py
index a0fb9159819a..2390eb394040 100644
--- a/scipy/sparse/_bsr.py
+++ b/scipy/sparse/_bsr.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._data import _data_matrix, _minmax_mixin
 from ._compressed import _cs_matrix
 from ._base import issparse, _formats, _spbase, sparray
@@ -21,109 +21,6 @@
 
 
 class _bsr_base(_cs_matrix, _minmax_mixin):
-    """Block Sparse Row format sparse array.
-
-    This can be instantiated in several ways:
-        bsr_array(D, [blocksize=(R,C)])
-            where D is a dense matrix or 2-D ndarray.
-
-        bsr_array(S, [blocksize=(R,C)])
-            with another sparse array S (equivalent to S.tobsr())
-
-        bsr_array((M, N), [blocksize=(R,C), dtype])
-            to construct an empty sparse array with shape (M, N)
-            dtype is optional, defaulting to dtype='d'.
-
-        bsr_array((data, ij), [blocksize=(R,C), shape=(M, N)])
-            where ``data`` and ``ij`` satisfy ``a[ij[0, k], ij[1, k]] = data[k]``
-
-        bsr_array((data, indices, indptr), [shape=(M, N)])
-            is the standard BSR representation where the block column
-            indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]``
-            and their corresponding block values are stored in
-            ``data[ indptr[i]: indptr[i+1] ]``. If the shape parameter is not
-            supplied, the array dimensions are inferred from the index arrays.
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the array
-    shape : 2-tuple
-        Shape of the array
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-    size
-    data
-        Data array
-    indices
-        BSR format index array
-    indptr
-        BSR format index pointer array
-    blocksize
-    has_sorted_indices : bool
-        Whether indices are sorted
-    has_canonical_format : bool
-    T
-
-
-    Notes
-    -----
-    Sparse arrays can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    **Summary of BSR format**
-
-    The Block Compressed Row (BSR) format is very similar to the Compressed
-    Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense
-    sub matrices like the last example below.  Block matrices often arise in
-    vector-valued finite element discretizations. In such cases, BSR is
-    considerably more efficient than CSR and CSC for many sparse arithmetic
-    operations.
-
-    **Blocksize**
-
-    The blocksize (R,C) must evenly divide the shape of the sparse array (M,N).
-    That is, R and C must satisfy the relationship ``M % R = 0`` and
-    ``N % C = 0``.
-
-    If no blocksize is specified, a simple heuristic is applied to determine
-    an appropriate blocksize.
-
-    **Canonical Format**
-
-    In canonical format, there are no duplicate blocks and indices are sorted
-    per row.
-
-    Examples
-    --------
-    >>> from scipy.sparse import bsr_array
-    >>> import numpy as np
-    >>> bsr_array((3, 4), dtype=np.int8).toarray()
-    array([[0, 0, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 0]], dtype=int8)
-
-    >>> row = np.array([0, 0, 1, 2, 2, 2])
-    >>> col = np.array([0, 2, 2, 0, 1, 2])
-    >>> data = np.array([1, 2, 3 ,4, 5, 6])
-    >>> bsr_array((data, (row, col)), shape=(3, 3)).toarray()
-    array([[1, 0, 2],
-           [0, 0, 3],
-           [4, 5, 6]])
-
-    >>> indptr = np.array([0, 2, 3, 6])
-    >>> indices = np.array([0, 2, 2, 0, 1, 2])
-    >>> data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)
-    >>> bsr_array((data,indices,indptr), shape=(6, 6)).toarray()
-    array([[1, 1, 0, 0, 2, 2],
-           [1, 1, 0, 0, 2, 2],
-           [0, 0, 0, 0, 3, 3],
-           [0, 0, 0, 0, 3, 3],
-           [4, 4, 5, 5, 6, 6],
-           [4, 4, 5, 5, 6, 6]])
-
-    """
     _format = 'bsr'
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False, blocksize=None):
@@ -235,7 +132,7 @@ def __init__(self, arg1, shape=None, dtype=None, copy=False, blocksize=None):
         self.check_format(full_check=False)
 
     def check_format(self, full_check=True):
-        """Check whether the matrix respects the BSR format.
+        """Check whether the array/matrix respects the BSR format.
 
         Parameters
         ----------
@@ -432,13 +329,13 @@ def _mul_sparse_matrix(self, other):
     ######################
 
     def tobsr(self, blocksize=None, copy=False):
-        """Convert this matrix into Block Sparse Row Format.
+        """Convert this array/matrix into Block Sparse Row Format.
 
         With copy=False, the data/indices may be shared between this
-        matrix and the resultant bsr_array.
+        array/matrix and the resultant bsr_array/bsr_matrix.
 
         If blocksize=(R, C) is provided, it will be used for determining
-        block size of the bsr_array.
+        block size of the bsr_array/bsr_matrix.
         """
         if blocksize not in [None, self.blocksize]:
             return self.tocsr().tobsr(blocksize=blocksize)
@@ -476,10 +373,10 @@ def tocsc(self, copy=False):
     tocsc.__doc__ = _spbase.tocsc.__doc__
 
     def tocoo(self, copy=True):
-        """Convert this matrix to COOrdinate format.
+        """Convert this array/matrix to COOrdinate format.
 
         When copy=False the data array will be shared between
-        this matrix and the resultant coo_matrix.
+        this array/matrix and the resultant coo_array/coo_matrix.
         """
 
         M,N = self.shape
@@ -569,7 +466,7 @@ def eliminate_zeros(self):
         self.prune()
 
     def sum_duplicates(self):
-        """Eliminate duplicate matrix entries by adding them together
+        """Eliminate duplicate array/matrix entries by adding them together
 
         The is an *in place* operation
         """
@@ -602,7 +499,7 @@ def sum_duplicates(self):
         self.has_canonical_format = True
 
     def sort_indices(self):
-        """Sort the indices of this matrix *in place*
+        """Sort the indices of this array/matrix *in place*
         """
         if self.has_sorted_indices:
             return
@@ -615,7 +512,7 @@ def sort_indices(self):
         self.has_sorted_indices = True
 
     def prune(self):
-        """ Remove empty space after all non-zero elements.
+        """Remove empty space after all non-zero elements.
         """
 
         R,C = self.blocksize
@@ -732,11 +629,215 @@ def isspmatrix_bsr(x):
 
 # This namespace class separates array from matrix with isinstance
 class bsr_array(_bsr_base, sparray):
-    pass
+    """
+    Block Sparse Row format sparse array.
+
+    This can be instantiated in several ways:
+        bsr_array(D, [blocksize=(R,C)])
+            where D is a 2-D ndarray.
+
+        bsr_array(S, [blocksize=(R,C)])
+            with another sparse array or matrix S (equivalent to S.tobsr())
+
+        bsr_array((M, N), [blocksize=(R,C), dtype])
+            to construct an empty sparse array with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        bsr_array((data, ij), [blocksize=(R,C), shape=(M, N)])
+            where ``data`` and ``ij`` satisfy ``a[ij[0, k], ij[1, k]] = data[k]``
+
+        bsr_array((data, indices, indptr), [shape=(M, N)])
+            is the standard BSR representation where the block column
+            indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]``
+            and their corresponding block values are stored in
+            ``data[ indptr[i]: indptr[i+1] ]``. If the shape parameter is not
+            supplied, the array dimensions are inferred from the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        BSR format data array of the array
+    indices
+        BSR format index array of the array
+    indptr
+        BSR format index pointer array of the array
+    blocksize
+        Block size
+    has_sorted_indices : bool
+        Whether indices are sorted
+    has_canonical_format : bool
+    T
+
+    Notes
+    -----
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    **Summary of BSR format**
+
+    The Block Sparse Row (BSR) format is very similar to the Compressed
+    Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense
+    sub matrices like the last example below. Such sparse block matrices often
+    arise in vector-valued finite element discretizations. In such cases, BSR is
+    considerably more efficient than CSR and CSC for many sparse arithmetic
+    operations.
+
+    **Blocksize**
+
+    The blocksize (R,C) must evenly divide the shape of the sparse array (M,N).
+    That is, R and C must satisfy the relationship ``M % R = 0`` and
+    ``N % C = 0``.
+
+    If no blocksize is specified, a simple heuristic is applied to determine
+    an appropriate blocksize.
+
+    **Canonical Format**
+
+    In canonical format, there are no duplicate blocks and indices are sorted
+    per row.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.sparse import bsr_array
+    >>> bsr_array((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 0, 1, 2, 2, 2])
+    >>> col = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3 ,4, 5, 6])
+    >>> bsr_array((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)
+    >>> bsr_array((data,indices,indptr), shape=(6, 6)).toarray()
+    array([[1, 1, 0, 0, 2, 2],
+           [1, 1, 0, 0, 2, 2],
+           [0, 0, 0, 0, 3, 3],
+           [0, 0, 0, 0, 3, 3],
+           [4, 4, 5, 5, 6, 6],
+           [4, 4, 5, 5, 6, 6]])
+
+    """
 
-bsr_array.__doc__ = _bsr_base.__doc__
 
 class bsr_matrix(spmatrix, _bsr_base):
-    pass
+    """
+    Block Sparse Row format sparse matrix.
+
+    This can be instantiated in several ways:
+        bsr_matrix(D, [blocksize=(R,C)])
+            where D is a 2-D ndarray.
+
+        bsr_matrix(S, [blocksize=(R,C)])
+            with another sparse array or matrix S (equivalent to S.tobsr())
+
+        bsr_matrix((M, N), [blocksize=(R,C), dtype])
+            to construct an empty sparse matrix with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        bsr_matrix((data, ij), [blocksize=(R,C), shape=(M, N)])
+            where ``data`` and ``ij`` satisfy ``a[ij[0, k], ij[1, k]] = data[k]``
+
+        bsr_matrix((data, indices, indptr), [shape=(M, N)])
+            is the standard BSR representation where the block column
+            indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]``
+            and their corresponding block values are stored in
+            ``data[ indptr[i]: indptr[i+1] ]``. If the shape parameter is not
+            supplied, the matrix dimensions are inferred from the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        BSR format data array of the matrix
+    indices
+        BSR format index array of the matrix
+    indptr
+        BSR format index pointer array of the matrix
+    blocksize
+        Block size
+    has_sorted_indices : bool
+        Whether indices are sorted
+    has_canonical_format : bool
+    T
+
+    Notes
+    -----
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    **Summary of BSR format**
+
+    The Block Sparse Row (BSR) format is very similar to the Compressed
+    Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense
+    sub matrices like the last example below. Such sparse block matrices often
+    arise in vector-valued finite element discretizations. In such cases, BSR is
+    considerably more efficient than CSR and CSC for many sparse arithmetic
+    operations.
+
+    **Blocksize**
+
+    The blocksize (R,C) must evenly divide the shape of the sparse matrix (M,N).
+    That is, R and C must satisfy the relationship ``M % R = 0`` and
+    ``N % C = 0``.
+
+    If no blocksize is specified, a simple heuristic is applied to determine
+    an appropriate blocksize.
+
+    **Canonical Format**
+
+    In canonical format, there are no duplicate blocks and indices are sorted
+    per row.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.sparse import bsr_matrix
+    >>> bsr_matrix((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 0, 1, 2, 2, 2])
+    >>> col = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3 ,4, 5, 6])
+    >>> bsr_matrix((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)
+    >>> bsr_matrix((data,indices,indptr), shape=(6, 6)).toarray()
+    array([[1, 1, 0, 0, 2, 2],
+           [1, 1, 0, 0, 2, 2],
+           [0, 0, 0, 0, 3, 3],
+           [0, 0, 0, 0, 3, 3],
+           [4, 4, 5, 5, 6, 6],
+           [4, 4, 5, 5, 6, 6]])
+
+    """
 
-bsr_matrix.__doc__ = _array_doc_to_matrix(_bsr_base.__doc__)
diff --git a/scipy/sparse/_compressed.py b/scipy/sparse/_compressed.py
index 3e287a6ed790..3a930447739d 100644
--- a/scipy/sparse/_compressed.py
+++ b/scipy/sparse/_compressed.py
@@ -20,7 +20,7 @@
 
 
 class _cs_matrix(_data_matrix, _minmax_mixin, IndexMixin):
-    """base matrix class for compressed row- and column-oriented matrices"""
+    """base array/matrix class for compressed row- and column-oriented arrays/matrices"""
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
         _data_matrix.__init__(self)
@@ -133,7 +133,7 @@ def _set_self(self, other, copy=False):
         self._shape = check_shape(other.shape)
 
     def check_format(self, full_check=True):
-        """Check whether the matrix respects the CSR or CSC format.
+        """Check whether the array/matrix respects the CSR or CSC format.
 
         Parameters
         ----------
@@ -365,7 +365,7 @@ def _sub_sparse(self, other):
         return self._binopt(other, '_minus_')
 
     def multiply(self, other):
-        """Point-wise multiplication by another matrix, vector, or
+        """Point-wise multiplication by another array/matrix, vector, or
         scalar.
         """
         # Scalar multiplication.
@@ -599,7 +599,7 @@ def minimum(self, other):
     #####################
 
     def sum(self, axis=None, dtype=None, out=None):
-        """Sum the matrix over the given axis.  If the axis is None, sum
+        """Sum the array/matrix over the given axis.  If the axis is None, sum
         over both rows and columns, returning a scalar.
         """
         # The _spbase base class already does axis=0 and axis=1 efficiently
@@ -1071,7 +1071,7 @@ def toarray(self, order=None, out=None):
     ##############################################################
 
     def eliminate_zeros(self):
-        """Remove zero entries from the matrix
+        """Remove zero entries from the array/matrix
 
         This is an *in place* operation.
         """
@@ -1082,7 +1082,7 @@ def eliminate_zeros(self):
 
     @property
     def has_canonical_format(self) -> bool:
-        """Whether the matrix has sorted indices and no duplicates
+        """Whether the array/matrix has sorted indices and no duplicates
 
         Returns
             - True: if the above applies
@@ -1110,7 +1110,7 @@ def has_canonical_format(self, val: bool):
             self.has_sorted_indices = True
 
     def sum_duplicates(self):
-        """Eliminate duplicate matrix entries by adding them together
+        """Eliminate duplicate entries by adding them together
 
         This is an *in place* operation.
         """
@@ -1130,7 +1130,7 @@ def has_sorted_indices(self) -> bool:
         """Whether the indices are sorted
 
         Returns
-            - True: if the indices of the matrix are in sorted order
+            - True: if the indices of the array/matrix are in sorted order
             - False: otherwise
         """
         # first check to see if result was cached
@@ -1147,7 +1147,7 @@ def has_sorted_indices(self, val: bool):
 
 
     def sorted_indices(self):
-        """Return a copy of this matrix with sorted indices
+        """Return a copy of this array/matrix with sorted indices
         """
         A = self.copy()
         A.sort_indices()
@@ -1158,7 +1158,7 @@ def sorted_indices(self):
         # return self.toother().toother()
 
     def sort_indices(self):
-        """Sort the indices of this matrix *in place*
+        """Sort the indices of this array/matrix *in place*
         """
 
         if not self.has_sorted_indices:
diff --git a/scipy/sparse/_coo.py b/scipy/sparse/_coo.py
index 66095fbf06c4..3de9da678153 100644
--- a/scipy/sparse/_coo.py
+++ b/scipy/sparse/_coo.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._sparsetools import coo_tocsr, coo_todense, coo_matvec
 from ._base import issparse, SparseEfficiencyWarning, _spbase, sparray
 from ._data import _data_matrix, _minmax_mixin
@@ -20,118 +20,6 @@
 
 
 class _coo_base(_data_matrix, _minmax_mixin):
-    """
-    A sparse matrix in COOrdinate format.
-
-    Also known as the 'ijv' or 'triplet' format.
-
-    This can be instantiated in several ways:
-        coo_array(D)
-            with a dense matrix D
-
-        coo_array(S)
-            with another sparse matrix S (equivalent to S.tocoo())
-
-        coo_array((M, N), [dtype])
-            to construct an empty matrix with shape (M, N)
-            dtype is optional, defaulting to dtype='d'.
-
-        coo_array((data, (i, j)), [shape=(M, N)])
-            to construct from three arrays:
-                1. data[:]   the entries of the matrix, in any order
-                2. i[:]      the row indices of the matrix entries
-                3. j[:]      the column indices of the matrix entries
-
-            Where ``A[i[k], j[k]] = data[k]``.  When shape is not
-            specified, it is inferred from the index arrays
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the matrix
-    shape : 2-tuple
-        Shape of the matrix
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-    size
-    data
-        COO format data array of the matrix
-    row
-        COO format row index array of the matrix
-    col
-        COO format column index array of the matrix
-    has_canonical_format : bool
-        Whether the matrix has sorted indices and no duplicates
-    format
-    T
-
-
-    Notes
-    -----
-
-    Sparse matrices can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    Advantages of the COO format
-        - facilitates fast conversion among sparse formats
-        - permits duplicate entries (see example)
-        - very fast conversion to and from CSR/CSC formats
-
-    Disadvantages of the COO format
-        - does not directly support:
-            + arithmetic operations
-            + slicing
-
-    Intended Usage
-        - COO is a fast format for constructing sparse matrices
-        - Once a matrix has been constructed, convert to CSR or
-          CSC format for fast arithmetic and matrix vector operations
-        - By default when converting to CSR or CSC format, duplicate (i,j)
-          entries will be summed together.  This facilitates efficient
-          construction of finite element matrices and the like. (see example)
-
-    Canonical format
-        - Entries and indices sorted by row, then column.
-        - There are no duplicate entries (i.e. duplicate (i,j) locations)
-        - Arrays MAY have explicit zeros.
-
-    Examples
-    --------
-
-    >>> # Constructing an empty matrix
-    >>> import numpy as np
-    >>> from scipy.sparse import coo_array
-    >>> coo_array((3, 4), dtype=np.int8).toarray()
-    array([[0, 0, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 0]], dtype=int8)
-
-    >>> # Constructing a matrix using ijv format
-    >>> row  = np.array([0, 3, 1, 0])
-    >>> col  = np.array([0, 3, 1, 2])
-    >>> data = np.array([4, 5, 7, 9])
-    >>> coo_array((data, (row, col)), shape=(4, 4)).toarray()
-    array([[4, 0, 9, 0],
-           [0, 7, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 5]])
-
-    >>> # Constructing a matrix with duplicate indices
-    >>> row  = np.array([0, 0, 1, 3, 1, 0, 0])
-    >>> col  = np.array([0, 2, 1, 3, 1, 0, 0])
-    >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
-    >>> coo = coo_array((data, (row, col)), shape=(4, 4))
-    >>> # Duplicate indices are maintained until implicitly or explicitly summed
-    >>> np.max(coo.data)
-    1
-    >>> coo.toarray()
-    array([[3, 0, 1, 0],
-           [0, 2, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 1]])
-
-    """
     _format = 'coo'
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
@@ -303,7 +191,7 @@ def _check(self):
 
     def transpose(self, axes=None, copy=False):
         if axes is not None and axes != (1, 0):
-            raise ValueError("Sparse matrices do not support "
+            raise ValueError("Sparse array/matrices do not support "
                               "an 'axes' parameter because swapping "
                               "dimensions is the only logical permutation.")
 
@@ -330,7 +218,6 @@ def resize(self, *shape):
     resize.__doc__ = _spbase.resize.__doc__
 
     def toarray(self, order=None, out=None):
-        """See the docstring for `_spbase.toarray`."""
         B = self._process_toarray_args(order, out)
         fortran = int(B.flags.f_contiguous)
         if not fortran and not B.flags.c_contiguous:
@@ -340,8 +227,10 @@ def toarray(self, order=None, out=None):
                     B.ravel('A'), fortran)
         return B
 
+    toarray.__doc__ = _spbase.toarray.__doc__
+
     def tocsc(self, copy=False):
-        """Convert this matrix to Compressed Sparse Column format
+        """Convert this array/matrix to Compressed Sparse Column format
 
         Duplicate entries will be summed together.
 
@@ -383,7 +272,7 @@ def tocsc(self, copy=False):
             return x
 
     def tocsr(self, copy=False):
-        """Convert this matrix to Compressed Sparse Row format
+        """Convert this array/matrix to Compressed Sparse Row format
 
         Duplicate entries will be summed together.
 
@@ -533,7 +422,7 @@ def _with_data(self,data,copy=True):
                                    shape=self.shape, dtype=data.dtype)
 
     def sum_duplicates(self):
-        """Eliminate duplicate matrix entries by adding them together
+        """Eliminate duplicate entries by adding them together
 
         This is an *in place* operation
         """
@@ -564,7 +453,7 @@ def _sum_duplicates(self, row, col, data):
         return row, col, data
 
     def eliminate_zeros(self):
-        """Remove zero entries from the matrix
+        """Remove zero entries from the array/matrix
 
         This is an *in place* operation
         """
@@ -632,11 +521,229 @@ def isspmatrix_coo(x):
 
 # This namespace class separates array from matrix with isinstance
 class coo_array(_coo_base, sparray):
-    pass
+    """
+    A sparse array in COOrdinate format.
+
+    Also known as the 'ijv' or 'triplet' format.
+
+    This can be instantiated in several ways:
+        coo_array(D)
+            where D is a 2-D ndarray
+
+        coo_array(S)
+            with another sparse array or matrix S (equivalent to S.tocoo())
+
+        coo_array((M, N), [dtype])
+            to construct an empty array with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        coo_array((data, (i, j)), [shape=(M, N)])
+            to construct from three arrays:
+                1. data[:]   the entries of the array, in any order
+                2. i[:]      the row indices of the array entries
+                3. j[:]      the column indices of the array entries
+
+            Where ``A[i[k], j[k]] = data[k]``.  When shape is not
+            specified, it is inferred from the index arrays
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        COO format data array of the array
+    row
+        COO format row index array of the array
+    col
+        COO format column index array of the array
+    has_canonical_format : bool
+        Whether the matrix has sorted indices and no duplicates
+    format
+    T
+
+    Notes
+    -----
+
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the COO format
+        - facilitates fast conversion among sparse formats
+        - permits duplicate entries (see example)
+        - very fast conversion to and from CSR/CSC formats
+
+    Disadvantages of the COO format
+        - does not directly support:
+            + arithmetic operations
+            + slicing
+
+    Intended Usage
+        - COO is a fast format for constructing sparse arrays
+        - Once a COO array has been constructed, convert to CSR or
+          CSC format for fast arithmetic and matrix vector operations
+        - By default when converting to CSR or CSC format, duplicate (i,j)
+          entries will be summed together.  This facilitates efficient
+          construction of finite element matrices and the like. (see example)
+
+    Canonical format
+        - Entries and indices sorted by row, then column.
+        - There are no duplicate entries (i.e. duplicate (i,j) locations)
+        - Data arrays MAY have explicit zeros.
+
+    Examples
+    --------
+
+    >>> # Constructing an empty array
+    >>> import numpy as np
+    >>> from scipy.sparse import coo_array
+    >>> coo_array((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> # Constructing an array using ijv format
+    >>> row  = np.array([0, 3, 1, 0])
+    >>> col  = np.array([0, 3, 1, 2])
+    >>> data = np.array([4, 5, 7, 9])
+    >>> coo_array((data, (row, col)), shape=(4, 4)).toarray()
+    array([[4, 0, 9, 0],
+           [0, 7, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 5]])
+
+    >>> # Constructing an array with duplicate indices
+    >>> row  = np.array([0, 0, 1, 3, 1, 0, 0])
+    >>> col  = np.array([0, 2, 1, 3, 1, 0, 0])
+    >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
+    >>> coo = coo_array((data, (row, col)), shape=(4, 4))
+    >>> # Duplicate indices are maintained until implicitly or explicitly summed
+    >>> np.max(coo.data)
+    1
+    >>> coo.toarray()
+    array([[3, 0, 1, 0],
+           [0, 2, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 1]])
+
+    """
 
-coo_array.__doc__ = _coo_base.__doc__
 
 class coo_matrix(spmatrix, _coo_base):
-    pass
+    """
+    A sparse matrix in COOrdinate format.
+
+    Also known as the 'ijv' or 'triplet' format.
+
+    This can be instantiated in several ways:
+        coo_matrix(D)
+            where D is a 2-D ndarray
+
+        coo_matrix(S)
+            with another sparse array or matrix S (equivalent to S.tocoo())
+
+        coo_matrix((M, N), [dtype])
+            to construct an empty matrix with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        coo_matrix((data, (i, j)), [shape=(M, N)])
+            to construct from three arrays:
+                1. data[:]   the entries of the matrix, in any order
+                2. i[:]      the row indices of the matrix entries
+                3. j[:]      the column indices of the matrix entries
+
+            Where ``A[i[k], j[k]] = data[k]``.  When shape is not
+            specified, it is inferred from the index arrays
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        COO format data array of the matrix
+    row
+        COO format row index array of the matrix
+    col
+        COO format column index array of the matrix
+    has_canonical_format : bool
+        Whether the matrix has sorted indices and no duplicates
+    format
+    T
+
+    Notes
+    -----
+
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the COO format
+        - facilitates fast conversion among sparse formats
+        - permits duplicate entries (see example)
+        - very fast conversion to and from CSR/CSC formats
+
+    Disadvantages of the COO format
+        - does not directly support:
+            + arithmetic operations
+            + slicing
+
+    Intended Usage
+        - COO is a fast format for constructing sparse matrices
+        - Once a COO matrix has been constructed, convert to CSR or
+          CSC format for fast arithmetic and matrix vector operations
+        - By default when converting to CSR or CSC format, duplicate (i,j)
+          entries will be summed together.  This facilitates efficient
+          construction of finite element matrices and the like. (see example)
+
+    Canonical format
+        - Entries and indices sorted by row, then column.
+        - There are no duplicate entries (i.e. duplicate (i,j) locations)
+        - Data arrays MAY have explicit zeros.
+
+    Examples
+    --------
+
+    >>> # Constructing an empty matrix
+    >>> import numpy as np
+    >>> from scipy.sparse import coo_matrix
+    >>> coo_matrix((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> # Constructing a matrix using ijv format
+    >>> row  = np.array([0, 3, 1, 0])
+    >>> col  = np.array([0, 3, 1, 2])
+    >>> data = np.array([4, 5, 7, 9])
+    >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
+    array([[4, 0, 9, 0],
+           [0, 7, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 5]])
+
+    >>> # Constructing a matrix with duplicate indices
+    >>> row  = np.array([0, 0, 1, 3, 1, 0, 0])
+    >>> col  = np.array([0, 2, 1, 3, 1, 0, 0])
+    >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
+    >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
+    >>> # Duplicate indices are maintained until implicitly or explicitly summed
+    >>> np.max(coo.data)
+    1
+    >>> coo.toarray()
+    array([[3, 0, 1, 0],
+           [0, 2, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 1]])
+
+    """
 
-coo_matrix.__doc__ = _array_doc_to_matrix(_coo_base.__doc__)
diff --git a/scipy/sparse/_csc.py b/scipy/sparse/_csc.py
index 28bd33018799..de07f170e30d 100644
--- a/scipy/sparse/_csc.py
+++ b/scipy/sparse/_csc.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._base import _spbase, sparray
 from ._sparsetools import csc_tocsr, expandptr
 from ._sputils import upcast
@@ -15,105 +15,11 @@
 
 
 class _csc_base(_cs_matrix):
-    """
-    Compressed Sparse Column matrix
-
-    This can be instantiated in several ways:
-
-        csc_array(D)
-            with a dense matrix or rank-2 ndarray D
-
-        csc_array(S)
-            with another sparse matrix S (equivalent to S.tocsc())
-
-        csc_array((M, N), [dtype])
-            to construct an empty matrix with shape (M, N)
-            dtype is optional, defaulting to dtype='d'.
-
-        csc_array((data, (row_ind, col_ind)), [shape=(M, N)])
-            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
-            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
-
-        csc_array((data, indices, indptr), [shape=(M, N)])
-            is the standard CSC representation where the row indices for
-            column i are stored in ``indices[indptr[i]:indptr[i+1]]``
-            and their corresponding values are stored in
-            ``data[indptr[i]:indptr[i+1]]``.  If the shape parameter is
-            not supplied, the matrix dimensions are inferred from
-            the index arrays.
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the matrix
-    shape : 2-tuple
-        Shape of the matrix
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-    size
-    data
-        Data array of the matrix
-    indices
-        CSC format index array
-    indptr
-        CSC format index pointer array
-    has_sorted_indices
-    has_canonical_format
-    T
-
-
-    Notes
-    -----
-
-    Sparse matrices can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    Advantages of the CSC format
-        - efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
-        - efficient column slicing
-        - fast matrix vector products (CSR, BSR may be faster)
-
-    Disadvantages of the CSC format
-      - slow row slicing operations (consider CSR)
-      - changes to the sparsity structure are expensive (consider LIL or DOK)
-
-    Canonical format
-      - Within each column, indices are sorted by row.
-      - There are no duplicate entries.
-
-    Examples
-    --------
-
-    >>> import numpy as np
-    >>> from scipy.sparse import csc_array
-    >>> csc_array((3, 4), dtype=np.int8).toarray()
-    array([[0, 0, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 0]], dtype=int8)
-
-    >>> row = np.array([0, 2, 2, 0, 1, 2])
-    >>> col = np.array([0, 0, 1, 2, 2, 2])
-    >>> data = np.array([1, 2, 3, 4, 5, 6])
-    >>> csc_array((data, (row, col)), shape=(3, 3)).toarray()
-    array([[1, 0, 4],
-           [0, 0, 5],
-           [2, 3, 6]])
-
-    >>> indptr = np.array([0, 2, 3, 6])
-    >>> indices = np.array([0, 2, 2, 0, 1, 2])
-    >>> data = np.array([1, 2, 3, 4, 5, 6])
-    >>> csc_array((data, indices, indptr), shape=(3, 3)).toarray()
-    array([[1, 0, 4],
-           [0, 0, 5],
-           [2, 3, 6]])
-
-    """
     _format = 'csc'
 
     def transpose(self, axes=None, copy=False):
         if axes is not None and axes != (1, 0):
-            raise ValueError("Sparse matrices do not support "
+            raise ValueError("Sparse arrays/matrices do not support "
                               "an 'axes' parameter because swapping "
                               "dimensions is the only logical permutation.")
 
@@ -267,11 +173,191 @@ def isspmatrix_csc(x):
 
 # This namespace class separates array from matrix with isinstance
 class csc_array(_csc_base, sparray):
-    pass
+    """
+    Compressed Sparse Column array.
+
+    This can be instantiated in several ways:
+        csc_array(D)
+            where D is a 2-D ndarray
+
+        csc_array(S)
+            with another sparse array or matrix S (equivalent to S.tocsc())
+
+        csc_array((M, N), [dtype])
+            to construct an empty array with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        csc_array((data, (row_ind, col_ind)), [shape=(M, N)])
+            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
+            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
+
+        csc_array((data, indices, indptr), [shape=(M, N)])
+            is the standard CSC representation where the row indices for
+            column i are stored in ``indices[indptr[i]:indptr[i+1]]``
+            and their corresponding values are stored in
+            ``data[indptr[i]:indptr[i+1]]``.  If the shape parameter is
+            not supplied, the array dimensions are inferred from
+            the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        CSC format data array of the array
+    indices
+        CSC format index array of the array
+    indptr
+        CSC format index pointer array of the array
+    has_sorted_indices
+    has_canonical_format
+    T
+
+    Notes
+    -----
+
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the CSC format
+        - efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
+        - efficient column slicing
+        - fast matrix vector products (CSR, BSR may be faster)
+
+    Disadvantages of the CSC format
+      - slow row slicing operations (consider CSR)
+      - changes to the sparsity structure are expensive (consider LIL or DOK)
+
+    Canonical format
+      - Within each column, indices are sorted by row.
+      - There are no duplicate entries.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import csc_array
+    >>> csc_array((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 2, 2, 0, 1, 2])
+    >>> col = np.array([0, 0, 1, 2, 2, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csc_array((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 4],
+           [0, 0, 5],
+           [2, 3, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csc_array((data, indices, indptr), shape=(3, 3)).toarray()
+    array([[1, 0, 4],
+           [0, 0, 5],
+           [2, 3, 6]])
+
+    """
 
-csc_array.__doc__ = _csc_base.__doc__
 
 class csc_matrix(spmatrix, _csc_base):
-    pass
+    """
+    Compressed Sparse Column matrix.
+
+    This can be instantiated in several ways:
+        csc_matrix(D)
+            where D is a 2-D ndarray
+
+        csc_matrix(S)
+            with another sparse array or matrix S (equivalent to S.tocsc())
+
+        csc_matrix((M, N), [dtype])
+            to construct an empty matrix with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        csc_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
+            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
+            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
+
+        csc_matrix((data, indices, indptr), [shape=(M, N)])
+            is the standard CSC representation where the row indices for
+            column i are stored in ``indices[indptr[i]:indptr[i+1]]``
+            and their corresponding values are stored in
+            ``data[indptr[i]:indptr[i+1]]``.  If the shape parameter is
+            not supplied, the matrix dimensions are inferred from
+            the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        CSC format data array of the matrix
+    indices
+        CSC format index array of the matrix
+    indptr
+        CSC format index pointer array of the matrix
+    has_sorted_indices
+    has_canonical_format
+    T
+
+    Notes
+    -----
+
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the CSC format
+        - efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
+        - efficient column slicing
+        - fast matrix vector products (CSR, BSR may be faster)
+
+    Disadvantages of the CSC format
+      - slow row slicing operations (consider CSR)
+      - changes to the sparsity structure are expensive (consider LIL or DOK)
+
+    Canonical format
+      - Within each column, indices are sorted by row.
+      - There are no duplicate entries.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import csc_matrix
+    >>> csc_matrix((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 2, 2, 0, 1, 2])
+    >>> col = np.array([0, 0, 1, 2, 2, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csc_matrix((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 4],
+           [0, 0, 5],
+           [2, 3, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csc_matrix((data, indices, indptr), shape=(3, 3)).toarray()
+    array([[1, 0, 4],
+           [0, 0, 5],
+           [2, 3, 6]])
+
+    """
 
-csc_matrix.__doc__ = _array_doc_to_matrix(_csc_base.__doc__)
diff --git a/scipy/sparse/_csr.py b/scipy/sparse/_csr.py
index 69b0920081c8..cfd4354c8efc 100644
--- a/scipy/sparse/_csr.py
+++ b/scipy/sparse/_csr.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._base import _spbase, sparray
 from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
                            get_csr_submatrix)
@@ -16,132 +16,11 @@
 
 
 class _csr_base(_cs_matrix):
-    """
-    Compressed Sparse Row matrix
-
-    This can be instantiated in several ways:
-        csr_array(D)
-            with a dense matrix or rank-2 ndarray D
-
-        csr_array(S)
-            with another sparse matrix S (equivalent to S.tocsr())
-
-        csr_array((M, N), [dtype])
-            to construct an empty matrix with shape (M, N)
-            dtype is optional, defaulting to dtype='d'.
-
-        csr_array((data, (row_ind, col_ind)), [shape=(M, N)])
-            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
-            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
-
-        csr_array((data, indices, indptr), [shape=(M, N)])
-            is the standard CSR representation where the column indices for
-            row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
-            corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
-            If the shape parameter is not supplied, the matrix dimensions
-            are inferred from the index arrays.
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the matrix
-    shape : 2-tuple
-        Shape of the matrix
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-    size
-    data
-        Data array of the matrix
-    indices
-        CSR format index array
-    indptr
-        CSR format index pointer array
-    has_sorted_indices
-    has_canonical_format
-    T
-
-
-    Notes
-    -----
-
-    Sparse matrices can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    Advantages of the CSR format
-      - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
-      - efficient row slicing
-      - fast matrix vector products
-
-    Disadvantages of the CSR format
-      - slow column slicing operations (consider CSC)
-      - changes to the sparsity structure are expensive (consider LIL or DOK)
-
-    Canonical Format
-        - Within each row, indices are sorted by column.
-        - There are no duplicate entries.
-
-    Examples
-    --------
-
-    >>> import numpy as np
-    >>> from scipy.sparse import csr_array
-    >>> csr_array((3, 4), dtype=np.int8).toarray()
-    array([[0, 0, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 0]], dtype=int8)
-
-    >>> row = np.array([0, 0, 1, 2, 2, 2])
-    >>> col = np.array([0, 2, 2, 0, 1, 2])
-    >>> data = np.array([1, 2, 3, 4, 5, 6])
-    >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
-    array([[1, 0, 2],
-           [0, 0, 3],
-           [4, 5, 6]])
-
-    >>> indptr = np.array([0, 2, 3, 6])
-    >>> indices = np.array([0, 2, 2, 0, 1, 2])
-    >>> data = np.array([1, 2, 3, 4, 5, 6])
-    >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()
-    array([[1, 0, 2],
-           [0, 0, 3],
-           [4, 5, 6]])
-
-    Duplicate entries are summed together:
-
-    >>> row = np.array([0, 1, 2, 0])
-    >>> col = np.array([0, 1, 1, 0])
-    >>> data = np.array([1, 2, 4, 8])
-    >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
-    array([[9, 0, 0],
-           [0, 2, 0],
-           [0, 4, 0]])
-
-    As an example of how to construct a CSR matrix incrementally,
-    the following snippet builds a term-document matrix from texts:
-
-    >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
-    >>> indptr = [0]
-    >>> indices = []
-    >>> data = []
-    >>> vocabulary = {}
-    >>> for d in docs:
-    ...     for term in d:
-    ...         index = vocabulary.setdefault(term, len(vocabulary))
-    ...         indices.append(index)
-    ...         data.append(1)
-    ...     indptr.append(len(indices))
-    ...
-    >>> csr_array((data, indices, indptr), dtype=int).toarray()
-    array([[2, 1, 0, 0],
-           [0, 1, 1, 1]])
-
-    """
     _format = 'csr'
 
     def transpose(self, axes=None, copy=False):
         if axes is not None and axes != (1, 0):
-            raise ValueError("Sparse matrices do not support "
+            raise ValueError("Sparse arrays/matrices do not support "
                               "an 'axes' parameter because swapping "
                               "dimensions is the only logical permutation.")
 
@@ -365,11 +244,247 @@ def isspmatrix_csr(x):
 
 # This namespace class separates array from matrix with isinstance
 class csr_array(_csr_base, sparray):
-    pass
+    """
+    Compressed Sparse Row array.
+
+    This can be instantiated in several ways:
+        csr_array(D)
+            where D is a 2-D ndarray
+
+        csr_array(S)
+            with another sparse array or matrix S (equivalent to S.tocsr())
+
+        csr_array((M, N), [dtype])
+            to construct an empty array with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        csr_array((data, (row_ind, col_ind)), [shape=(M, N)])
+            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
+            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
+
+        csr_array((data, indices, indptr), [shape=(M, N)])
+            is the standard CSR representation where the column indices for
+            row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
+            corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
+            If the shape parameter is not supplied, the array dimensions
+            are inferred from the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        CSR format data array of the array
+    indices
+        CSR format index array of the array
+    indptr
+        CSR format index pointer array of the array
+    has_sorted_indices
+    has_canonical_format
+    T
+
+    Notes
+    -----
+
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the CSR format
+      - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
+      - efficient row slicing
+      - fast matrix vector products
+
+    Disadvantages of the CSR format
+      - slow column slicing operations (consider CSC)
+      - changes to the sparsity structure are expensive (consider LIL or DOK)
+
+    Canonical Format
+        - Within each row, indices are sorted by column.
+        - There are no duplicate entries.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import csr_array
+    >>> csr_array((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 0, 1, 2, 2, 2])
+    >>> col = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    Duplicate entries are summed together:
+
+    >>> row = np.array([0, 1, 2, 0])
+    >>> col = np.array([0, 1, 1, 0])
+    >>> data = np.array([1, 2, 4, 8])
+    >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
+    array([[9, 0, 0],
+           [0, 2, 0],
+           [0, 4, 0]])
+
+    As an example of how to construct a CSR array incrementally,
+    the following snippet builds a term-document array from texts:
+
+    >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
+    >>> indptr = [0]
+    >>> indices = []
+    >>> data = []
+    >>> vocabulary = {}
+    >>> for d in docs:
+    ...     for term in d:
+    ...         index = vocabulary.setdefault(term, len(vocabulary))
+    ...         indices.append(index)
+    ...         data.append(1)
+    ...     indptr.append(len(indices))
+    ...
+    >>> csr_array((data, indices, indptr), dtype=int).toarray()
+    array([[2, 1, 0, 0],
+           [0, 1, 1, 1]])
+
+    """
 
-csr_array.__doc__ = _csr_base.__doc__
 
 class csr_matrix(spmatrix, _csr_base):
-    pass
+    """
+    Compressed Sparse Row matrix.
+
+    This can be instantiated in several ways:
+        csr_matrix(D)
+            where D is a 2-D ndarray
+
+        csr_matrix(S)
+            with another sparse array or matrix S (equivalent to S.tocsr())
+
+        csr_matrix((M, N), [dtype])
+            to construct an empty matrix with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
+            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
+            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
+
+        csr_matrix((data, indices, indptr), [shape=(M, N)])
+            is the standard CSR representation where the column indices for
+            row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
+            corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
+            If the shape parameter is not supplied, the matrix dimensions
+            are inferred from the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        CSR format data array of the matrix
+    indices
+        CSR format index array of the matrix
+    indptr
+        CSR format index pointer array of the matrix
+    has_sorted_indices
+    has_canonical_format
+    T
+
+    Notes
+    -----
+
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the CSR format
+      - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
+      - efficient row slicing
+      - fast matrix vector products
+
+    Disadvantages of the CSR format
+      - slow column slicing operations (consider CSC)
+      - changes to the sparsity structure are expensive (consider LIL or DOK)
+
+    Canonical Format
+        - Within each row, indices are sorted by column.
+        - There are no duplicate entries.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import csr_matrix
+    >>> csr_matrix((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 0, 1, 2, 2, 2])
+    >>> col = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    Duplicate entries are summed together:
+
+    >>> row = np.array([0, 1, 2, 0])
+    >>> col = np.array([0, 1, 1, 0])
+    >>> data = np.array([1, 2, 4, 8])
+    >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
+    array([[9, 0, 0],
+           [0, 2, 0],
+           [0, 4, 0]])
+
+    As an example of how to construct a CSR matrix incrementally,
+    the following snippet builds a term-document matrix from texts:
+
+    >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
+    >>> indptr = [0]
+    >>> indices = []
+    >>> data = []
+    >>> vocabulary = {}
+    >>> for d in docs:
+    ...     for term in d:
+    ...         index = vocabulary.setdefault(term, len(vocabulary))
+    ...         indices.append(index)
+    ...         data.append(1)
+    ...     indptr.append(len(indices))
+    ...
+    >>> csr_matrix((data, indices, indptr), dtype=int).toarray()
+    array([[2, 1, 0, 0],
+           [0, 1, 1, 1]])
+
+    """
 
-csr_matrix.__doc__ = _array_doc_to_matrix(_csr_base.__doc__)
diff --git a/scipy/sparse/_data.py b/scipy/sparse/_data.py
index 407fa8eedc75..87e5f6534e38 100644
--- a/scipy/sparse/_data.py
+++ b/scipy/sparse/_data.py
@@ -303,14 +303,14 @@ def _arg_min_or_max(self, axis, out, argmin_or_argmax, compare):
 
     def max(self, axis=None, out=None):
         """
-        Return the maximum of the matrix or maximum along an axis.
+        Return the maximum of the array/matrix or maximum along an axis.
         This takes all elements into account, not just the non-zero ones.
 
         Parameters
         ----------
         axis : {-2, -1, 0, 1, None} optional
             Axis along which the sum is computed. The default is to
-            compute the maximum over all the matrix elements, returning
+            compute the maximum over all elements, returning
             a scalar (i.e., `axis` = `None`).
 
         out : None, optional
@@ -327,7 +327,7 @@ def max(self, axis=None, out=None):
 
         See Also
         --------
-        min : The minimum value of a sparse matrix along a given axis.
+        min : The minimum value of a sparse array/matrix along a given axis.
         numpy.matrix.max : NumPy's implementation of 'max' for matrices
 
         """
@@ -335,14 +335,14 @@ def max(self, axis=None, out=None):
 
     def min(self, axis=None, out=None):
         """
-        Return the minimum of the matrix or maximum along an axis.
+        Return the minimum of the array/matrix or maximum along an axis.
         This takes all elements into account, not just the non-zero ones.
 
         Parameters
         ----------
         axis : {-2, -1, 0, 1, None} optional
             Axis along which the sum is computed. The default is to
-            compute the minimum over all the matrix elements, returning
+            compute the minimum over all elements, returning
             a scalar (i.e., `axis` = `None`).
 
         out : None, optional
@@ -359,7 +359,7 @@ def min(self, axis=None, out=None):
 
         See Also
         --------
-        max : The maximum value of a sparse matrix along a given axis.
+        max : The maximum value of a sparse array/matrix along a given axis.
         numpy.matrix.min : NumPy's implementation of 'min' for matrices
 
         """
@@ -367,7 +367,7 @@ def min(self, axis=None, out=None):
 
     def nanmax(self, axis=None, out=None):
         """
-        Return the maximum of the matrix or maximum along an axis, ignoring any
+        Return the maximum of the array/matrix or maximum along an axis, ignoring any
         NaNs. This takes all elements into account, not just the non-zero
         ones.
 
@@ -377,7 +377,7 @@ def nanmax(self, axis=None, out=None):
         ----------
         axis : {-2, -1, 0, 1, None} optional
             Axis along which the maximum is computed. The default is to
-            compute the maximum over all the matrix elements, returning
+            compute the maximum over all elements, returning
             a scalar (i.e., `axis` = `None`).
 
         out : None, optional
@@ -394,9 +394,9 @@ def nanmax(self, axis=None, out=None):
 
         See Also
         --------
-        nanmin : The minimum value of a sparse matrix along a given axis,
+        nanmin : The minimum value of a sparse array/matrix along a given axis,
                  ignoring NaNs.
-        max : The maximum value of a sparse matrix along a given axis,
+        max : The maximum value of a sparse array/matrix along a given axis,
               propagating NaNs.
         numpy.nanmax : NumPy's implementation of 'nanmax'.
 
@@ -405,7 +405,7 @@ def nanmax(self, axis=None, out=None):
 
     def nanmin(self, axis=None, out=None):
         """
-        Return the minimum of the matrix or minimum along an axis, ignoring any
+        Return the minimum of the array/matrix or minimum along an axis, ignoring any
         NaNs. This takes all elements into account, not just the non-zero
         ones.
 
@@ -415,7 +415,7 @@ def nanmin(self, axis=None, out=None):
         ----------
         axis : {-2, -1, 0, 1, None} optional
             Axis along which the minimum is computed. The default is to
-            compute the minimum over all the matrix elements, returning
+            compute the minimum over all elements, returning
             a scalar (i.e., `axis` = `None`).
 
         out : None, optional
@@ -432,9 +432,9 @@ def nanmin(self, axis=None, out=None):
 
         See Also
         --------
-        nanmax : The maximum value of a sparse matrix along a given axis,
+        nanmax : The maximum value of a sparse array/matrix along a given axis,
                  ignoring NaNs.
-        min : The minimum value of a sparse matrix along a given axis,
+        min : The minimum value of a sparse array/matrix along a given axis,
               propagating NaNs.
         numpy.nanmin : NumPy's implementation of 'nanmin'.
 
diff --git a/scipy/sparse/_dia.py b/scipy/sparse/_dia.py
index 4e37c2dd888a..20c22f2bad88 100644
--- a/scipy/sparse/_dia.py
+++ b/scipy/sparse/_dia.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._base import issparse, _formats, _spbase, sparray
 from ._data import _data_matrix
 from ._sputils import (isshape, upcast_char, getdtype, get_sum_dtype, validateaxis, check_shape)
@@ -14,78 +14,6 @@
 
 
 class _dia_base(_data_matrix):
-    """Sparse matrix with DIAgonal storage
-
-    This can be instantiated in several ways:
-        dia_array(D)
-            with a dense matrix
-
-        dia_array(S)
-            with another sparse matrix S (equivalent to S.todia())
-
-        dia_array((M, N), [dtype])
-            to construct an empty matrix with shape (M, N),
-            dtype is optional, defaulting to dtype='d'.
-
-        dia_array((data, offsets), shape=(M, N))
-            where the ``data[k,:]`` stores the diagonal entries for
-            diagonal ``offsets[k]`` (See example below)
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the matrix
-    shape : 2-tuple
-        Shape of the matrix
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-    size
-    data
-        DIA format data array of the matrix
-    offsets
-        DIA format offset array of the matrix
-    T
-
-
-    Notes
-    -----
-
-    Sparse matrices can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    Examples
-    --------
-
-    >>> import numpy as np
-    >>> from scipy.sparse import dia_array
-    >>> dia_array((3, 4), dtype=np.int8).toarray()
-    array([[0, 0, 0, 0],
-           [0, 0, 0, 0],
-           [0, 0, 0, 0]], dtype=int8)
-
-    >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
-    >>> offsets = np.array([0, -1, 2])
-    >>> dia_array((data, offsets), shape=(4, 4)).toarray()
-    array([[1, 0, 3, 0],
-           [1, 2, 0, 4],
-           [0, 2, 3, 0],
-           [0, 0, 3, 4]])
-
-    >>> from scipy.sparse import dia_array
-    >>> n = 10
-    >>> ex = np.ones(n)
-    >>> data = np.array([ex, 2 * ex, ex])
-    >>> offsets = np.array([-1, 0, 1])
-    >>> dia_array((data, offsets), shape=(n, n)).toarray()
-    array([[2., 1., 0., ..., 0., 0., 0.],
-           [1., 2., 1., ..., 0., 0., 0.],
-           [0., 1., 2., ..., 0., 0., 0.],
-           ...,
-           [0., 0., 0., ..., 2., 1., 0.],
-           [0., 0., 0., ..., 1., 2., 1.],
-           [0., 0., 0., ..., 0., 1., 2.]])
-    """
     _format = 'dia'
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
@@ -326,7 +254,7 @@ def todia(self, copy=False):
 
     def transpose(self, axes=None, copy=False):
         if axes is not None and axes != (1, 0):
-            raise ValueError("Sparse matrices do not support "
+            raise ValueError("Sparse arrays/matrices do not support "
                               "an 'axes' parameter because swapping "
                               "dimensions is the only logical permutation.")
 
@@ -473,11 +401,150 @@ def isspmatrix_dia(x):
 
 # This namespace class separates array from matrix with isinstance
 class dia_array(_dia_base, sparray):
-    pass
+    """
+    Sparse array with DIAgonal storage.
+
+    This can be instantiated in several ways:
+        dia_array(D)
+            where D is a 2-D ndarray
+
+        dia_array(S)
+            with another sparse array or matrix S (equivalent to S.todia())
+
+        dia_array((M, N), [dtype])
+            to construct an empty array with shape (M, N),
+            dtype is optional, defaulting to dtype='d'.
+
+        dia_array((data, offsets), shape=(M, N))
+            where the ``data[k,:]`` stores the diagonal entries for
+            diagonal ``offsets[k]`` (See example below)
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        DIA format data array of the array
+    offsets
+        DIA format offset array of the array
+    T
+
+    Notes
+    -----
+
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import dia_array
+    >>> dia_array((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
+    >>> offsets = np.array([0, -1, 2])
+    >>> dia_array((data, offsets), shape=(4, 4)).toarray()
+    array([[1, 0, 3, 0],
+           [1, 2, 0, 4],
+           [0, 2, 3, 0],
+           [0, 0, 3, 4]])
+
+    >>> from scipy.sparse import dia_array
+    >>> n = 10
+    >>> ex = np.ones(n)
+    >>> data = np.array([ex, 2 * ex, ex])
+    >>> offsets = np.array([-1, 0, 1])
+    >>> dia_array((data, offsets), shape=(n, n)).toarray()
+    array([[2., 1., 0., ..., 0., 0., 0.],
+           [1., 2., 1., ..., 0., 0., 0.],
+           [0., 1., 2., ..., 0., 0., 0.],
+           ...,
+           [0., 0., 0., ..., 2., 1., 0.],
+           [0., 0., 0., ..., 1., 2., 1.],
+           [0., 0., 0., ..., 0., 1., 2.]])
+    """
 
-dia_array.__doc__ = _dia_base.__doc__
 
 class dia_matrix(spmatrix, _dia_base):
-    pass
+    """
+    Sparse matrix with DIAgonal storage.
+
+    This can be instantiated in several ways:
+        dia_matrix(D)
+            where D is a 2-D ndarray
+
+        dia_matrix(S)
+            with another sparse array or matrix S (equivalent to S.todia())
+
+        dia_matrix((M, N), [dtype])
+            to construct an empty matrix with shape (M, N),
+            dtype is optional, defaulting to dtype='d'.
+
+        dia_matrix((data, offsets), shape=(M, N))
+            where the ``data[k,:]`` stores the diagonal entries for
+            diagonal ``offsets[k]`` (See example below)
 
-dia_matrix.__doc__ = _array_doc_to_matrix(_dia_base.__doc__)
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        DIA format data array of the matrix
+    offsets
+        DIA format offset array of the matrix
+    T
+
+    Notes
+    -----
+
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import dia_matrix
+    >>> dia_matrix((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
+    >>> offsets = np.array([0, -1, 2])
+    >>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
+    array([[1, 0, 3, 0],
+           [1, 2, 0, 4],
+           [0, 2, 3, 0],
+           [0, 0, 3, 4]])
+
+    >>> from scipy.sparse import dia_matrix
+    >>> n = 10
+    >>> ex = np.ones(n)
+    >>> data = np.array([ex, 2 * ex, ex])
+    >>> offsets = np.array([-1, 0, 1])
+    >>> dia_matrix((data, offsets), shape=(n, n)).toarray()
+    array([[2., 1., 0., ..., 0., 0., 0.],
+           [1., 2., 1., ..., 0., 0., 0.],
+           [0., 1., 2., ..., 0., 0., 0.],
+           ...,
+           [0., 0., 0., ..., 2., 1., 0.],
+           [0., 0., 0., ..., 1., 2., 1.],
+           [0., 0., 0., ..., 0., 1., 2.]])
+    """
diff --git a/scipy/sparse/_dok.py b/scipy/sparse/_dok.py
index df84e6629b40..1b5f7a9de33c 100644
--- a/scipy/sparse/_dok.py
+++ b/scipy/sparse/_dok.py
@@ -7,7 +7,7 @@
 import itertools
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._base import _spbase, sparray, issparse
 from ._index import IndexMixin
 from ._sputils import (isdense, getdtype, isshape, isintlike, isscalarlike,
@@ -15,57 +15,6 @@
 
 
 class _dok_base(_spbase, IndexMixin):
-    """
-    Dictionary Of Keys based sparse matrix.
-
-    This is an efficient structure for constructing sparse
-    matrices incrementally.
-
-    This can be instantiated in several ways:
-        dok_array(D)
-            with a dense matrix, D
-
-        dok_array(S)
-            with a sparse matrix, S
-
-        dok_array((M,N), [dtype])
-            create the matrix with initial shape (M,N)
-            dtype is optional, defaulting to dtype='d'
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the matrix
-    shape : 2-tuple
-        Shape of the matrix
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-        Number of nonzero elements
-    size
-    T
-
-
-    Notes
-    -----
-
-    Sparse matrices can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    Allows for efficient O(1) access of individual elements.
-    Duplicates are not allowed.
-    Can be efficiently converted to a coo_matrix once constructed.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from scipy.sparse import dok_array
-    >>> S = dok_array((5, 5), dtype=np.float32)
-    >>> for i in range(5):
-    ...     for j in range(5):
-    ...         S[i, j] = i + j    # Update element
-
-    """
     _format = 'dok'
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
@@ -368,7 +317,7 @@ def __reduce__(self):
 
     def transpose(self, axes=None, copy=False):
         if axes is not None and axes != (1, 0):
-            raise ValueError("Sparse matrices do not support "
+            raise ValueError("Sparse arrays/matrices do not support "
                              "an 'axes' parameter because swapping "
                              "dimensions is the only logical permutation.")
 
@@ -465,20 +414,115 @@ def isspmatrix_dok(x):
 
 # This namespace class separates array from matrix with isinstance
 class dok_array(_dok_base, sparray):
-    pass
+    """
+    Dictionary Of Keys based sparse array.
+
+    This is an efficient structure for constructing sparse
+    arrays incrementally.
+
+    This can be instantiated in several ways:
+        dok_array(D)
+            where D is a 2-D ndarray
+
+        dok_array(S)
+            with another sparse array or matrix S (equivalent to S.todok())
+
+        dok_array((M,N), [dtype])
+            create the array with initial shape (M,N)
+            dtype is optional, defaulting to dtype='d'
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+        Number of nonzero elements
+    size
+    T
+
+    Notes
+    -----
+
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    - Allows for efficient O(1) access of individual elements.
+    - Duplicates are not allowed.
+    - Can be efficiently converted to a coo_array once constructed.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.sparse import dok_array
+    >>> S = dok_array((5, 5), dtype=np.float32)
+    >>> for i in range(5):
+    ...     for j in range(5):
+    ...         S[i, j] = i + j    # Update element
+
+    """
 
-dok_array.__doc__ = _dok_base.__doc__
 
 class dok_matrix(spmatrix, _dok_base, dict):
+    """
+    Dictionary Of Keys based sparse matrix.
+
+    This is an efficient structure for constructing sparse
+    matrices incrementally.
+
+    This can be instantiated in several ways:
+        dok_matrix(D)
+            where D is a 2-D ndarray
+
+        dok_matrix(S)
+            with another sparse array or matrix S (equivalent to S.todok())
+
+        dok_matrix((M,N), [dtype])
+            create the matrix with initial shape (M,N)
+            dtype is optional, defaulting to dtype='d'
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+        Number of nonzero elements
+    size
+    T
+
+    Notes
+    -----
+
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    - Allows for efficient O(1) access of individual elements.
+    - Duplicates are not allowed.
+    - Can be efficiently converted to a coo_matrix once constructed.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.sparse import dok_matrix
+    >>> S = dok_matrix((5, 5), dtype=np.float32)
+    >>> for i in range(5):
+    ...     for j in range(5):
+    ...         S[i, j] = i + j    # Update element
+
+    """
     def set_shape(self, shape):
         new_matrix = self.reshape(shape, copy=False).asformat(self.format)
         self.__dict__ = new_matrix.__dict__
 
     def get_shape(self):
-        """Get shape of a sparse array."""
+        """Get shape of a sparse matrix."""
         return self._shape
 
     shape = property(fget=get_shape, fset=set_shape)
-
-
-dok_matrix.__doc__ = _array_doc_to_matrix(_dok_base.__doc__)
diff --git a/scipy/sparse/_index.py b/scipy/sparse/_index.py
index d4e7f3049141..003d885fca79 100644
--- a/scipy/sparse/_index.py
+++ b/scipy/sparse/_index.py
@@ -1,4 +1,4 @@
-"""Indexing mixin for sparse matrix classes.
+"""Indexing mixin for sparse array/matrix classes.
 """
 import numpy as np
 from warnings import warn
diff --git a/scipy/sparse/_lil.py b/scipy/sparse/_lil.py
index 2944394052a0..d0323a9c2297 100644
--- a/scipy/sparse/_lil.py
+++ b/scipy/sparse/_lil.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 
-from ._matrix import spmatrix, _array_doc_to_matrix
+from ._matrix import spmatrix
 from ._base import _spbase, sparray, issparse
 from ._index import IndexMixin, INT_TYPES, _broadcast_arrays
 from ._sputils import (getdtype, isshape, isscalarlike, upcast_scalar,
@@ -18,69 +18,6 @@
 
 
 class _lil_base(_spbase, IndexMixin):
-    """Row-based LIst of Lists sparse matrix
-
-    This is a structure for constructing sparse matrices incrementally.
-    Note that inserting a single item can take linear time in the worst case;
-    to construct a matrix efficiently, make sure the items are pre-sorted by
-    index, per row.
-
-    This can be instantiated in several ways:
-        lil_array(D)
-            with a dense matrix or rank-2 ndarray D
-
-        lil_array(S)
-            with another sparse matrix S (equivalent to S.tolil())
-
-        lil_array((M, N), [dtype])
-            to construct an empty matrix with shape (M, N)
-            dtype is optional, defaulting to dtype='d'.
-
-    Attributes
-    ----------
-    dtype : dtype
-        Data type of the matrix
-    shape : 2-tuple
-        Shape of the matrix
-    ndim : int
-        Number of dimensions (this is always 2)
-    nnz
-    size
-    data
-        LIL format data array of the matrix
-    rows
-        LIL format row index array of the matrix
-    T
-
-
-    Notes
-    -----
-    Sparse matrices can be used in arithmetic operations: they support
-    addition, subtraction, multiplication, division, and matrix power.
-
-    Advantages of the LIL format
-        - supports flexible slicing
-        - changes to the matrix sparsity structure are efficient
-
-    Disadvantages of the LIL format
-        - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
-        - slow column slicing (consider CSC)
-        - slow matrix vector products (consider CSR or CSC)
-
-    Intended Usage
-        - LIL is a convenient format for constructing sparse matrices
-        - once a matrix has been constructed, convert to CSR or
-          CSC format for fast arithmetic and matrix vector operations
-        - consider using the COO format when constructing large matrices
-
-    Data Structure
-        - An array (``self.rows``) of rows, each of which is a sorted
-          list of column indices of non-zero elements.
-        - The corresponding nonzero values are stored in similar
-          fashion in ``self.data``.
-
-
-    """
     _format = 'lil'
 
     def __init__(self, arg1, shape=None, dtype=None, copy=False):
@@ -551,11 +488,130 @@ def isspmatrix_lil(x):
 
 # This namespace class separates array from matrix with isinstance
 class lil_array(_lil_base, sparray):
-    pass
+    """
+    Row-based LIst of Lists sparse array.
+
+    This is a structure for constructing sparse arrays incrementally.
+    Note that inserting a single item can take linear time in the worst case;
+    to construct the array efficiently, make sure the items are pre-sorted by
+    index, per row.
+
+    This can be instantiated in several ways:
+        lil_array(D)
+            where D is a 2-D ndarray
+
+        lil_array(S)
+            with another sparse array or matrix S (equivalent to S.tolil())
+
+        lil_array((M, N), [dtype])
+            to construct an empty array with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        LIL format data array of the array
+    rows
+        LIL format row index array of the array
+    T
+
+    Notes
+    -----
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the LIL format
+        - supports flexible slicing
+        - changes to the array sparsity structure are efficient
+
+    Disadvantages of the LIL format
+        - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
+        - slow column slicing (consider CSC)
+        - slow matrix vector products (consider CSR or CSC)
+
+    Intended Usage
+        - LIL is a convenient format for constructing sparse arrays
+        - once an array has been constructed, convert to CSR or
+          CSC format for fast arithmetic and matrix vector operations
+        - consider using the COO format when constructing large arrays
+
+    Data Structure
+        - An array (``self.rows``) of rows, each of which is a sorted
+          list of column indices of non-zero elements.
+        - The corresponding nonzero values are stored in similar
+          fashion in ``self.data``.
+
+    """
 
-lil_array.__doc__ = _lil_base.__doc__
 
 class lil_matrix(spmatrix, _lil_base):
-    pass
+    """
+    Row-based LIst of Lists sparse matrix.
+
+    This is a structure for constructing sparse matrices incrementally.
+    Note that inserting a single item can take linear time in the worst case;
+    to construct the matrix efficiently, make sure the items are pre-sorted by
+    index, per row.
+
+    This can be instantiated in several ways:
+        lil_matrix(D)
+            where D is a 2-D ndarray
+
+        lil_matrix(S)
+            with another sparse array or matrix S (equivalent to S.tolil())
+
+        lil_matrix((M, N), [dtype])
+            to construct an empty matrix with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        LIL format data array of the matrix
+    rows
+        LIL format row index array of the matrix
+    T
+
+    Notes
+    -----
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the LIL format
+        - supports flexible slicing
+        - changes to the matrix sparsity structure are efficient
+
+    Disadvantages of the LIL format
+        - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
+        - slow column slicing (consider CSC)
+        - slow matrix vector products (consider CSR or CSC)
+
+    Intended Usage
+        - LIL is a convenient format for constructing sparse matrices
+        - once a matrix has been constructed, convert to CSR or
+          CSC format for fast arithmetic and matrix vector operations
+        - consider using the COO format when constructing large matrices
+
+    Data Structure
+        - An array (``self.rows``) of rows, each of which is a sorted
+          list of column indices of non-zero elements.
+        - The corresponding nonzero values are stored in similar
+          fashion in ``self.data``.
 
-lil_matrix.__doc__ = _array_doc_to_matrix(_lil_base.__doc__)
+    """
diff --git a/scipy/sparse/_matrix.py b/scipy/sparse/_matrix.py
index db82eaee8a5f..d3808444529a 100644
--- a/scipy/sparse/_matrix.py
+++ b/scipy/sparse/_matrix.py
@@ -69,7 +69,7 @@ def get_shape(self):
                      doc="Shape of the matrix")
 
     def asfptype(self):
-        """Upcast array to a floating point format (if necessary)"""
+        """Upcast matrix to a floating point format (if necessary)"""
         return self._asfptype()
 
     def getmaxprint(self):
@@ -92,7 +92,7 @@ def getnnz(self, axis=None):
         return self._getnnz(axis=axis)
 
     def getH(self):
-        """Return the Hermitian transpose of this array.
+        """Return the Hermitian transpose of this matrix.
 
         See Also
         --------
@@ -101,23 +101,13 @@ def getH(self):
         return self.conjugate().transpose()
 
     def getcol(self, j):
-        """Returns a copy of column j of the array, as an (m x 1) sparse
-        array (column vector).
+        """Returns a copy of column j of the matrix, as an (m x 1) sparse
+        matrix (column vector).
         """
         return self._getcol(j)
 
     def getrow(self, i):
-        """Returns a copy of row i of the array, as a (1 x n) sparse
-        array (row vector).
+        """Returns a copy of row i of the matrix, as a (1 x n) sparse
+        matrix (row vector).
         """
         return self._getrow(i)
-
-
-def _array_doc_to_matrix(docstr):
-    # For opimized builds with stripped docstrings
-    if docstr is None:
-        return None
-    return (
-        docstr.replace('sparse arrays', 'sparse matrices')
-              .replace('sparse array', 'sparse matrix')
-    )