diff --git a/gpu4pyscf/df/int3c2e_bdiv.py b/gpu4pyscf/df/int3c2e_bdiv.py index f6d6d8457..d87159255 100644 --- a/gpu4pyscf/df/int3c2e_bdiv.py +++ b/gpu4pyscf/df/int3c2e_bdiv.py @@ -494,6 +494,8 @@ def orbital_pair_cart2sph(self, compressed_eri3c, inplace=True): p0 = p1 = 0 for (i, j), bas_ij_idx in zip(ij_tasks, self.shl_pair_idx): nshl_pair = bas_ij_idx.size + if nshl_pair == 0: + continue ci = c2s[i] cj = c2s[j] nfi, di = ci.shape diff --git a/gpu4pyscf/df/tests/test_df_int3c2e.py b/gpu4pyscf/df/tests/test_df_int3c2e.py index eec40ecb7..64d1a4072 100644 --- a/gpu4pyscf/df/tests/test_df_int3c2e.py +++ b/gpu4pyscf/df/tests/test_df_int3c2e.py @@ -192,3 +192,19 @@ def test_int3c2e_sparse1(): dat = int3c2e_bdiv.aux_e2(mol, mol) ref = incore.aux_e2(mol, mol) assert abs(dat.get() - ref).max() < 1e-9 + + int3c2e_opt = int3c2e_bdiv.Int3c2eOpt(mol, mol).build() + ao_pair_mapping = int3c2e_opt.create_ao_pair_mapping() + nao = mol.nao + i, j = divmod(ao_pair_mapping, nao) + coeff = cp.asarray(int3c2e_opt.coeff) + aux_coeff = cp.asarray(int3c2e_opt.coeff) + for eri3c_batch in int3c2e_opt.int3c2e_bdiv_generator(): + eri3c_batch = int3c2e_opt.orbital_pair_cart2sph(eri3c_batch, inplace=True) + dat = cp.zeros((nao*nao, nao)) + dat[i*nao+j] = dat[j*nao+i] = eri3c_batch + dat = dat.reshape(nao,nao,nao) + dat = contract('pqr,rk->pqk', dat, aux_coeff) + dat = contract('pqk,qj->pjk', dat, coeff) + dat = contract('pjk,pi->ijk', dat, coeff) + assert abs(dat.get() - ref).max() < 1e-9 diff --git a/gpu4pyscf/lib/cutensor.py b/gpu4pyscf/lib/cutensor.py index 084718b6e..24bef8353 100644 --- a/gpu4pyscf/lib/cutensor.py +++ b/gpu4pyscf/lib/cutensor.py @@ -56,6 +56,20 @@ def _auto_create_mode(array, mode): # cutensor_dtype, alignment_req=alignment_req) # return _tensor_descriptors[key] +def _contract_einsum(pattern, a, b, alpha, beta, out=None, einsum=cupy.einsum): + if out is None: + out = einsum(pattern, a, b) + out *= alpha + elif beta == 0.: + out[:] = einsum(pattern, a, b) + out *= alpha + else: + out *= beta + tmp = einsum(pattern, a, b) + tmp *= alpha + out += tmp + return cupy.asarray(out, order='C') + def contraction( pattern, a, b, alpha, beta, out=None, @@ -67,6 +81,9 @@ def contraction( compute_desc=0, ws_pref=WORKSPACE_RECOMMENDED ): + if a.size == 0 or b.size == 0: + # cutensor does not support the 0-sized operands + return _contract_einsum(pattern, a, b, alpha, beta, out) pattern = pattern.replace(" ", "") str_a, rest = pattern.split(',') @@ -138,22 +155,11 @@ def contraction( warnings.warn(f'using {contract_engine} as the tensor contraction engine.') def contract(pattern, a, b, alpha=1.0, beta=0.0, out=None): try: - if out is None: - out = einsum(pattern, a, b) - out *= alpha - elif beta == 0.: - out[:] = einsum(pattern, a, b) - out *= alpha - else: - out *= beta - tmp = einsum(pattern, a, b) - tmp *= alpha - out += tmp + return _contract_einsum(pattern, a, b, alpha, beta, out, einsum) except cupy.cuda.memory.OutOfMemoryError: print('Out of memory error caused by cupy.einsum. ' 'It is recommended to install cutensor to resolve this.') raise - return cupy.asarray(out, order='C') else: def contract(pattern, a, b, alpha=1.0, beta=0.0, out=None): '''