Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up the HEOM RHS construct #2128

Merged
merged 18 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
c767ab3
Add CSR constructor for building a matrix from CSR sub-blocks.
hodgestar Mar 20, 2023
e8b2de8
Use from_csr_blocks to build CSR RHS for the HEOM.
hodgestar Mar 20, 2023
031a093
Merge branch 'master' into feature/faster-heom-rhs-construction
hodgestar Mar 20, 2023
da03d45
Specify type for i and check sorting of block rows.
hodgestar Mar 20, 2023
0769771
Sanity check from_csr_blocks inputs.
hodgestar Mar 21, 2023
0a41dab
Mark from_csr_blocks as an internal function.
hodgestar Mar 21, 2023
c815c52
Neaten up construction of block_rows, block_cols and block_ops arrays.
hodgestar Mar 21, 2023
3a75217
Add support for empty op matrices to _from_csr_blocks.
hodgestar Mar 22, 2023
964a989
Add tests for _from_csr_blocks.
hodgestar Mar 22, 2023
426ad4b
Correctly use matrix B in _from_csr_blocks kron test.
hodgestar Mar 22, 2023
25d2a7f
Fix HEOM RHS dims to be consistent in the time-dependent and independ…
hodgestar Mar 22, 2023
46ffa0a
Improve performance of ADOs.idx.
hodgestar Mar 22, 2023
a7231d7
Remove debugging breakpoint.
hodgestar Mar 22, 2023
55263ef
Add towncrier entry.
hodgestar Mar 22, 2023
6472ed8
Test _from_csr_blocks against zero CSR matrices rather than empty one…
hodgestar Mar 22, 2023
d01243e
Return csr.zeros() rather than csr.empty() in fast paths -- empty CSR…
hodgestar Mar 22, 2023
6348d1a
Use CSR as the dtype of the temporary ops array in gather.
hodgestar Mar 22, 2023
58f82aa
Document that the RHS of the HEOMSolver has an underlying sparse repr…
hodgestar Mar 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/2128.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up the construction of the RHS of the HEOM solver by a factor of 4x by converting the final step to Cython.
3 changes: 3 additions & 0 deletions qutip/core/data/csr.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,6 @@ cpdef CSR from_dense(Dense matrix)
cdef CSR from_coo_pointers(base.idxint *rows, base.idxint *cols, double complex *data,
base.idxint n_rows, base.idxint n_cols, base.idxint nnz,
double tol=*)

cpdef CSR _from_csr_blocks(base.idxint[:] block_rows, base.idxint[:] block_cols, CSR[:] block_ops,
base.idxint n_blocks, base.idxint block_size)
119 changes: 119 additions & 0 deletions qutip/core/data/csr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -844,3 +844,122 @@ def diags(diagonals, offsets=None, shape=None):
return diags_(
diagonals_, np.array(offsets_, dtype=idxint_dtype), n_rows, n_cols,
)


cpdef CSR _from_csr_blocks(
base.idxint[:] block_rows, base.idxint[:] block_cols, CSR[:] block_ops,
base.idxint n_blocks, base.idxint block_size
):
"""
Construct a CSR from non-overlapping blocks.

Each operator in ``block_ops`` should be a square CSR operator with
shape ``(block_size, block_size)``. The output operator consists of
``n_blocks`` by ``n_blocks`` blocks and thus has shape
``(n_blocks * block_size, n_blocks * block_size)``.

None of the operators should overlap (i.e. the list of block row and
column pairs should be unique).

Parameters
----------
block_rows : sequence of base.idxint integers
The block row for each operator. The block row should be in
``range(0, n_blocks)``.

block_cols : sequence of base.idxint integers
The block column for each operator. The block column should be in
``range(0, n_blocks)``.

block_ops : sequence of CSR matrixes
The operators corresponding to the rows and columns in ``block_rows``
and ``block_cols``.

n_blocks : base.idxint
Number of blocks. The shape of the final matrix is
(n_blocks * block, n_blocks * block).

block_size : base.idxint
Size of each block. The shape of matrices in ``block_ops`` is
``(block_size, block_size)``.
"""
cdef CSR op
cdef base.idxint shape = n_blocks * block_size
cdef base.idxint nnz_ = 0
cdef base.idxint n_ops = len(block_ops)
cdef base.idxint i, j
cdef base.idxint row_idx, col_idx

# check arrays are the same length
if len(block_rows) != n_ops or len(block_cols) != n_ops:
raise ValueError(
"The arrays block_rows, block_cols and block_ops should have"
" the same length."
)

if n_ops == 0:
return zeros(shape, shape)

# check op shapes and calculate nnz
for op in block_ops:
nnz_ += nnz(op)
if op.shape[0] != block_size or op.shape[1] != block_size:
raise ValueError(
"Block operators (block_ops) do not have the correct shape."
)

# check ops are ordered by (row, column)
row_idx = block_rows[0]
col_idx = block_cols[0]
for i in range(1, n_ops):
if (
block_rows[i] < row_idx or
(block_rows[i] == row_idx and block_cols[i] <= col_idx)
):
raise ValueError(
"The arrays block_rows and block_cols must be sorted"
" by (row, column)."
)
row_idx = block_rows[i]
col_idx = block_cols[i]

if nnz_ == 0:
return zeros(shape, shape)

cdef CSR out = empty(shape, shape, nnz_)
cdef base.idxint op_idx = 0
cdef base.idxint prev_op_idx = 0
cdef base.idxint end = 0
cdef base.idxint row_pos, col_pos
cdef base.idxint op_row, op_row_start, op_row_end, op_row_len

out.row_index[0] = 0

for row_idx in range(n_blocks):
prev_op_idx = op_idx
while op_idx < n_ops:
if block_rows[op_idx] != row_idx:
break
op_idx += 1

row_pos = row_idx * block_size
for op_row in range(block_size):
for i in range(prev_op_idx, op_idx):
op = block_ops[i]
if nnz(op) == 0:
# empty CSR matrices have uninitialized row_index entries.
# it's unclear whether users should ever see such matrixes
# but we support them here anyway.
continue
col_idx = block_cols[i]
col_pos = col_idx * block_size
op_row_start = op.row_index[op_row]
op_row_end = op.row_index[op_row + 1]
op_row_len = op_row_end - op_row_start
for j in range(op_row_len):
out.col_index[end + j] = op.col_index[op_row_start + j] + col_pos
out.data[end + j] = op.data[op_row_start + j]
end += op_row_len
out.row_index[row_pos + op_row + 1] = end

return out
111 changes: 43 additions & 68 deletions qutip/solver/heom/bofin_solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def __init__(self, exponents, max_depth):

self.labels = list(state_number_enumerate(self.dims, max_depth))
self._label_idx = {s: i for i, s in enumerate(self.labels)}
self.idx = self._label_idx.__getitem__

def idx(self, label):
"""
Expand All @@ -134,6 +135,13 @@ def idx(self, label):
-------
int
The index of the label within the list of ADO labels.

Note
----
This implementation of the ``.idx(...)`` method is just for
reference and documentation. To avoid the cost of a Python
function call, it is replaced with
``self._label_idx.__getitem__`` when the instance is created.
"""
return self._label_idx[label]

Expand Down Expand Up @@ -617,7 +625,7 @@ def __init__(self, H, bath, max_depth, *, options=None):
self._sId = _data.identity(self._sup_shape, dtype="csr")

# pre-calculate superoperators required by _grad_prev and _grad_next:
Qs = [exp.Q for exp in self.ados.exponents]
Qs = [exp.Q.to("csr") for exp in self.ados.exponents]
self._spreQ = [spre(op).data for op in Qs]
self._spostQ = [spost(op).data for op in Qs]
self._s_pre_minus_post_Q = [
Expand Down Expand Up @@ -689,14 +697,11 @@ def _combine_bath_exponents(self, bath):
)
return exponents

def _grad_n(self, L, he_n):
def _grad_n(self, he_n):
""" Get the gradient for the hierarchy ADO at level n. """
vk = self.ados.vk
vk_sum = sum(he_n[i] * vk[i] for i in range(len(vk)))
if L is not None: # time-independent case
op = _data.sub(L, _data.mul(self._sId, vk_sum))
else: # time-dependent case
op = _data.mul(self._sId, -vk_sum)
op = _data.mul(self._sId, -vk_sum)
return op

def _grad_prev(self, he_n, k):
Expand Down Expand Up @@ -811,14 +816,14 @@ def _grad_next_fermionic(self, he_n, k):
)
return op

def _rhs(self, L):
def _rhs(self):
""" Make the RHS for the HEOM. """
ops = _GatherHEOMRHS(
self.ados.idx, block=self._sup_shape, nhe=self._n_ados
)

for he_n in self.ados.labels:
op = self._grad_n(L, he_n)
op = self._grad_n(he_n)
ops.add_op(he_n, he_n, op)
for k in range(len(self.ados.dims)):
next_he = self.ados.next(he_n, k)
Expand All @@ -834,12 +839,17 @@ def _rhs(self, L):

def _calculate_rhs(self):
""" Make the full RHS required by the solver. """
rhs_mat = self._rhs()
rhs_dims = [
self._sup_shape * self._n_ados, self._sup_shape * self._n_ados
]
h_identity = _data.identity(self._n_ados, dtype="csr")

if self.L_sys.isconstant:
L0 = self.L_sys(0)
rhs_mat = self._rhs(L0.data)
rhs = QobjEvo(Qobj(rhs_mat, dims=[
self._sup_shape * self._n_ados, self._sup_shape * self._n_ados
]))
# For the constant case, we just add the Liouvillian to the
# diagonal blocks of the RHS matrix.
rhs_mat += _data.kron(h_identity, self.L_sys(0).to("csr").data)
rhs = QobjEvo(Qobj(rhs_mat, dims=rhs_dims))
else:
# In the time dependent case, we construct the parameters
# for the ODE gradient function under the assumption that
Expand All @@ -850,23 +860,23 @@ def _calculate_rhs(self):
# This assumption holds because only _grad_n dependents on
# the system Liouvillian (and not _grad_prev or _grad_next) and
# the bath coupling operators are not time-dependent.
#
# By calling _rhs(None) we omit the Liouvillian completely from
# the RHS and then manually add back the Liouvillian afterwards.
rhs_mat = self._rhs(None)
rhs = QobjEvo(Qobj(rhs_mat))
h_identity = _data.identity(self._n_ados, dtype="csr")
rhs = QobjEvo(Qobj(rhs_mat, dims=rhs_dims))

def _kron(x):
return Qobj(_data.kron(h_identity, x.data)).to("csr")
return Qobj(
_data.kron(h_identity, x.data),
dims=rhs_dims,
).to("csr")

rhs += self.L_sys.linear_map(_kron)

# The assertion that rhs_mat has data type CSR is just a sanity
# check on the RHS creation. The base solver class will still
# convert the RHS to the type required by the ODE integrator if
# required.
# needed.
assert isinstance(rhs_mat, _csr.CSR)
assert isinstance(rhs, QobjEvo)
assert rhs.dims == rhs_dims

return rhs

Expand Down Expand Up @@ -1265,15 +1275,15 @@ class _GatherHEOMRHS:
The number of ADOs in the hierarchy.
"""
def __init__(self, f_idx, block, nhe):
self._block = block
self._nhe = nhe
self._block_size = block
self._n_blocks = nhe
self._f_idx = f_idx
self._ops = []

def add_op(self, row_he, col_he, op):
""" Add an block operator to the list. """
self._ops.append(
(self._f_idx(row_he), self._f_idx(col_he), _data.to["csr"](op))
(self._f_idx(row_he), self._f_idx(col_he), op)
)

def gather(self):
Expand All @@ -1294,48 +1304,13 @@ def gather(self):
rhs : :obj:`Data`
A combined matrix of shape ``(block * nhe, block * ne)``.
"""
block = self._block
nhe = self._nhe
ops = self._ops
shape = (block * nhe, block * nhe)
if not ops:
return _data.zeros(*shape, dtype="csr")
ops.sort()
nnz = sum(_csr.nnz(op) for _, _, op in ops)
indptr = np.zeros(shape[0] + 1, dtype=np.int32)
indices = np.zeros(nnz, dtype=np.int32)
data = np.zeros(nnz, dtype=np.complex128)
end = 0
op_idx = 0
op_len = len(ops)

for row_idx in range(nhe):
prev_op_idx = op_idx
while op_idx < op_len:
if ops[op_idx][0] != row_idx:
break
op_idx += 1

row_ops = ops[prev_op_idx: op_idx]
rowpos = row_idx * block
for op_row in range(block):
for _, col_idx, op in row_ops:
op = op.as_scipy() # convert CSR to SciPy csr_matrix
colpos = col_idx * block
op_row_start = op.indptr[op_row]
op_row_end = op.indptr[op_row + 1]
op_row_len = op_row_end - op_row_start
if op_row_len == 0:
continue
indices[end: end + op_row_len] = (
op.indices[op_row_start: op_row_end] + colpos
)
data[end: end + op_row_len] = (
op.data[op_row_start: op_row_end]
)
end += op_row_len
indptr[rowpos + op_row + 1] = end

return _csr.CSR(
(data, indices, indptr), shape=shape, copy=False,
self._ops.sort()
ops = np.array(self._ops, dtype=[
("row", _data.base.idxint_dtype),
("col", _data.base.idxint_dtype),
("op", _data.CSR),
])
return _csr._from_csr_blocks(
ops["row"], ops["col"], ops["op"],
self._n_blocks, self._block_size,
)