qutip · hodgestar · Mar 22, 2023 · Mar 20, 2023 · Mar 20, 2023 · Mar 20, 2023
diff --git a/doc/changes/2128.feature b/doc/changes/2128.feature
@@ -0,0 +1 @@
+Speed up the construction of the RHS of the HEOM solver by a factor of 4x by converting the final step to Cython.
diff --git a/qutip/core/data/csr.pxd b/qutip/core/data/csr.pxd
@@ -161,3 +161,6 @@ cpdef CSR from_dense(Dense matrix)
 cdef CSR from_coo_pointers(base.idxint *rows, base.idxint *cols, double complex *data,
                            base.idxint n_rows, base.idxint n_cols, base.idxint nnz,
                            double tol=*)
+
+cpdef CSR _from_csr_blocks(base.idxint[:] block_rows, base.idxint[:] block_cols, CSR[:] block_ops,
+                          base.idxint n_blocks, base.idxint block_size)
diff --git a/qutip/core/data/csr.pyx b/qutip/core/data/csr.pyx
@@ -844,3 +844,122 @@ def diags(diagonals, offsets=None, shape=None):
     return diags_(
         diagonals_, np.array(offsets_, dtype=idxint_dtype), n_rows, n_cols,
     )
+
+
+cpdef CSR _from_csr_blocks(
+    base.idxint[:] block_rows, base.idxint[:] block_cols, CSR[:] block_ops,
+    base.idxint n_blocks, base.idxint block_size
+):
+    """
+    Construct a CSR from non-overlapping blocks.
+
+    Each operator in ``block_ops`` should be a square CSR operator with
+    shape ``(block_size, block_size)``. The output operator consists of
+    ``n_blocks`` by ``n_blocks`` blocks and thus has shape
+    ``(n_blocks * block_size, n_blocks * block_size)``.
+
+    None of the operators should overlap (i.e. the list of block row and
+    column pairs should be unique).
+
+    Parameters
+    ----------
+    block_rows : sequence of base.idxint integers
+        The block row for each operator. The block row should be in
+        ``range(0, n_blocks)``.
+
+    block_cols : sequence of base.idxint integers
+        The block column for each operator. The block column should be in
+        ``range(0, n_blocks)``.
+
+    block_ops : sequence of CSR matrixes
+        The operators corresponding to the rows and columns in ``block_rows``
+        and ``block_cols``.
+
+    n_blocks : base.idxint
+        Number of blocks. The shape of the final matrix is
+        (n_blocks * block, n_blocks * block).
+
+    block_size : base.idxint
+        Size of each block. The shape of matrices in ``block_ops`` is
+        ``(block_size, block_size)``.
+    """
+    cdef CSR op
+    cdef base.idxint shape = n_blocks * block_size
+    cdef base.idxint nnz_ = 0
+    cdef base.idxint n_ops = len(block_ops)
+    cdef base.idxint i, j
+    cdef base.idxint row_idx, col_idx
+
+    # check arrays are the same length
+    if len(block_rows) != n_ops or len(block_cols) != n_ops:
+        raise ValueError(
+            "The arrays block_rows, block_cols and block_ops should have"
+            " the same length."
+        )
+
+    if n_ops == 0:
+        return zeros(shape, shape)
+
+    # check op shapes and calculate nnz
+    for op in block_ops:
+        nnz_ += nnz(op)
+        if op.shape[0] != block_size or op.shape[1] != block_size:
+            raise ValueError(
+                "Block operators (block_ops) do not have the correct shape."
+            )
+
+    # check ops are ordered by (row, column)
+    row_idx = block_rows[0]
+    col_idx = block_cols[0]
+    for i in range(1, n_ops):
+        if (
+            block_rows[i] < row_idx or
+            (block_rows[i] == row_idx and block_cols[i] <= col_idx)
+        ):
+            raise ValueError(
+                "The arrays block_rows and block_cols must be sorted"
+                " by (row, column)."
+            )
+        row_idx = block_rows[i]
+        col_idx = block_cols[i]
+
+    if nnz_ == 0:
+        return zeros(shape, shape)
+
+    cdef CSR out = empty(shape, shape, nnz_)
+    cdef base.idxint op_idx = 0
+    cdef base.idxint prev_op_idx = 0
+    cdef base.idxint end = 0
+    cdef base.idxint row_pos, col_pos
+    cdef base.idxint op_row, op_row_start, op_row_end, op_row_len
+
+    out.row_index[0] = 0
+
+    for row_idx in range(n_blocks):
+        prev_op_idx = op_idx
+        while op_idx < n_ops:
+            if block_rows[op_idx] != row_idx:
+                break
+            op_idx += 1
+
+        row_pos = row_idx * block_size
+        for op_row in range(block_size):
+            for i in range(prev_op_idx, op_idx):
+                op = block_ops[i]
+                if nnz(op) == 0:
+                    # empty CSR matrices have uninitialized row_index entries.
+                    # it's unclear whether users should ever see such matrixes
+                    # but we support them here anyway.
+                    continue
+                col_idx = block_cols[i]
+                col_pos = col_idx * block_size
+                op_row_start = op.row_index[op_row]
+                op_row_end = op.row_index[op_row + 1]
+                op_row_len = op_row_end - op_row_start
+                for j in range(op_row_len):
+                    out.col_index[end + j] = op.col_index[op_row_start + j] + col_pos
+                    out.data[end + j] = op.data[op_row_start + j]
+                end += op_row_len
+            out.row_index[row_pos + op_row + 1] = end
+
+    return out
diff --git a/qutip/solver/heom/bofin_solvers.py b/qutip/solver/heom/bofin_solvers.py
@@ -119,6 +119,7 @@ def __init__(self, exponents, max_depth):
 
         self.labels = list(state_number_enumerate(self.dims, max_depth))
         self._label_idx = {s: i for i, s in enumerate(self.labels)}
+        self.idx = self._label_idx.__getitem__
 
     def idx(self, label):
         """
@@ -134,6 +135,13 @@ def idx(self, label):
         -------
         int
             The index of the label within the list of ADO labels.
+
+        Note
+        ----
+        This implementation of the ``.idx(...)`` method is just for
+        reference and documentation. To avoid the cost of a Python
+        function call, it is replaced with
+        ``self._label_idx.__getitem__`` when the instance is created.
         """
         return self._label_idx[label]
 
@@ -617,7 +625,7 @@ def __init__(self, H, bath, max_depth, *, options=None):
         self._sId = _data.identity(self._sup_shape, dtype="csr")
 
         # pre-calculate superoperators required by _grad_prev and _grad_next:
-        Qs = [exp.Q for exp in self.ados.exponents]
+        Qs = [exp.Q.to("csr") for exp in self.ados.exponents]
         self._spreQ = [spre(op).data for op in Qs]
         self._spostQ = [spost(op).data for op in Qs]
         self._s_pre_minus_post_Q = [
@@ -689,14 +697,11 @@ def _combine_bath_exponents(self, bath):
             )
         return exponents
 
-    def _grad_n(self, L, he_n):
+    def _grad_n(self, he_n):
         """ Get the gradient for the hierarchy ADO at level n. """
         vk = self.ados.vk
         vk_sum = sum(he_n[i] * vk[i] for i in range(len(vk)))
-        if L is not None:  # time-independent case
-            op = _data.sub(L, _data.mul(self._sId, vk_sum))
-        else:  # time-dependent case
-            op = _data.mul(self._sId, -vk_sum)
+        op = _data.mul(self._sId, -vk_sum)
         return op
 
     def _grad_prev(self, he_n, k):
@@ -811,14 +816,14 @@ def _grad_next_fermionic(self, he_n, k):
             )
         return op
 
-    def _rhs(self, L):
+    def _rhs(self):
         """ Make the RHS for the HEOM. """
         ops = _GatherHEOMRHS(
             self.ados.idx, block=self._sup_shape, nhe=self._n_ados
         )
 
         for he_n in self.ados.labels:
-            op = self._grad_n(L, he_n)
+            op = self._grad_n(he_n)
             ops.add_op(he_n, he_n, op)
             for k in range(len(self.ados.dims)):
                 next_he = self.ados.next(he_n, k)
@@ -834,12 +839,17 @@ def _rhs(self, L):
 
     def _calculate_rhs(self):
         """ Make the full RHS required by the solver. """
+        rhs_mat = self._rhs()
+        rhs_dims = [
+            self._sup_shape * self._n_ados, self._sup_shape * self._n_ados
+        ]
+        h_identity = _data.identity(self._n_ados, dtype="csr")
+
         if self.L_sys.isconstant:
-            L0 = self.L_sys(0)
-            rhs_mat = self._rhs(L0.data)
-            rhs = QobjEvo(Qobj(rhs_mat, dims=[
-                self._sup_shape * self._n_ados, self._sup_shape * self._n_ados
-            ]))
+            # For the constant case, we just add the Liouvillian to the
+            # diagonal blocks of the RHS matrix.
+            rhs_mat += _data.kron(h_identity, self.L_sys(0).to("csr").data)
+            rhs = QobjEvo(Qobj(rhs_mat, dims=rhs_dims))
         else:
             # In the time dependent case, we construct the parameters
             # for the ODE gradient function under the assumption that
@@ -850,23 +860,23 @@ def _calculate_rhs(self):
             # This assumption holds because only _grad_n dependents on
             # the system Liouvillian (and not _grad_prev or _grad_next) and
             # the bath coupling operators are not time-dependent.
-            #
-            # By calling _rhs(None) we omit the Liouvillian completely from
-            # the RHS and then manually add back the Liouvillian afterwards.
-            rhs_mat = self._rhs(None)
-            rhs = QobjEvo(Qobj(rhs_mat))
-            h_identity = _data.identity(self._n_ados, dtype="csr")
+            rhs = QobjEvo(Qobj(rhs_mat, dims=rhs_dims))
 
             def _kron(x):
-                return Qobj(_data.kron(h_identity, x.data)).to("csr")
+                return Qobj(
+                    _data.kron(h_identity, x.data),
+                    dims=rhs_dims,
+                ).to("csr")
+
             rhs += self.L_sys.linear_map(_kron)
 
         # The assertion that rhs_mat has data type CSR is just a sanity
         # check on the RHS creation. The base solver class will still
         # convert the RHS to the type required by the ODE integrator if
-        # required.
+        # needed.
         assert isinstance(rhs_mat, _csr.CSR)
         assert isinstance(rhs, QobjEvo)
+        assert rhs.dims == rhs_dims
 
         return rhs
 
@@ -1265,15 +1275,15 @@ class _GatherHEOMRHS:
             The number of ADOs in the hierarchy.
     """
     def __init__(self, f_idx, block, nhe):
-        self._block = block
-        self._nhe = nhe
+        self._block_size = block
+        self._n_blocks = nhe
         self._f_idx = f_idx
         self._ops = []
 
     def add_op(self, row_he, col_he, op):
         """ Add an block operator to the list. """
         self._ops.append(
-            (self._f_idx(row_he), self._f_idx(col_he), _data.to["csr"](op))
+            (self._f_idx(row_he), self._f_idx(col_he), op)
         )
 
     def gather(self):
@@ -1294,48 +1304,13 @@ def gather(self):
             rhs : :obj:`Data`
                 A combined matrix of shape ``(block * nhe, block * ne)``.
         """
-        block = self._block
-        nhe = self._nhe
-        ops = self._ops
-        shape = (block * nhe, block * nhe)
-        if not ops:
-            return _data.zeros(*shape, dtype="csr")
-        ops.sort()
-        nnz = sum(_csr.nnz(op) for _, _, op in ops)
-        indptr = np.zeros(shape[0] + 1, dtype=np.int32)
-        indices = np.zeros(nnz, dtype=np.int32)
-        data = np.zeros(nnz, dtype=np.complex128)
-        end = 0
-        op_idx = 0
-        op_len = len(ops)
-
-        for row_idx in range(nhe):
-            prev_op_idx = op_idx
-            while op_idx < op_len:
-                if ops[op_idx][0] != row_idx:
-                    break
-                op_idx += 1
-
-            row_ops = ops[prev_op_idx: op_idx]
-            rowpos = row_idx * block
-            for op_row in range(block):
-                for _, col_idx, op in row_ops:
-                    op = op.as_scipy()  # convert CSR to SciPy csr_matrix
-                    colpos = col_idx * block
-                    op_row_start = op.indptr[op_row]
-                    op_row_end = op.indptr[op_row + 1]
-                    op_row_len = op_row_end - op_row_start
-                    if op_row_len == 0:
-                        continue
-                    indices[end: end + op_row_len] = (
-                        op.indices[op_row_start: op_row_end] + colpos
-                    )
-                    data[end: end + op_row_len] = (
-                        op.data[op_row_start: op_row_end]
-                    )
-                    end += op_row_len
-                indptr[rowpos + op_row + 1] = end
-
-        return _csr.CSR(
-            (data, indices, indptr), shape=shape, copy=False,
+        self._ops.sort()
+        ops = np.array(self._ops, dtype=[
+            ("row", _data.base.idxint_dtype),
+            ("col", _data.base.idxint_dtype),
+            ("op", _data.CSR),
+        ])
+        return _csr._from_csr_blocks(
+            ops["row"], ops["col"], ops["op"],
+            self._n_blocks, self._block_size,
         )