Skip to content

Commit

Permalink
Merge pull request #154 from leeping/tc_qmmm
Browse files Browse the repository at this point in the history
Support TeraChem QM/MM via OpenMM interface
  • Loading branch information
leeping authored May 30, 2023
2 parents 151bda3 + d407d85 commit 9981b39
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 23 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/test-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ jobs:
# - name: Configure Conda
# run: conda config --set channel_priority flexible

- name: Install Dependencies from Conda
run: conda env update --file=devtools/conda-envs/latest.yaml --name=base
- name: Install Mamba
run: conda install -c conda-forge mamba

- name: Install Dependencies
run: mamba env update --file=devtools/conda-envs/latest.yaml --name=base

- name: pytest
run: pytest -v --cov=geometric geometric/tests/
Expand Down
121 changes: 106 additions & 15 deletions geometric/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from .nifty import bak, au2ev, eqcgmx, fqcgmx, bohr2ang, logger, getWorkQueue, queue_up_src_dest, rootdir, copy_tree_over
from .errors import EngineError, CheckCoordError, Psi4EngineError, QChemEngineError, TeraChemEngineError, \
ConicalIntersectionEngineError, OpenMMEngineError, GromacsEngineError, MolproEngineError, QCEngineAPIEngineError, GaussianEngineError, QUICKEngineError
from .xml_helper import read_coors_from_xml, write_coors_to_xml

# Strings matching common DFT functionals
# exclude "pw", "scan" because they might cause false positives
Expand Down Expand Up @@ -98,6 +99,7 @@ def edit_tcin(fin=None, fout=None, options=None, defaults=None, reqxyz=True, ign
tcin_dirname = os.path.dirname(os.path.abspath(fin))
section_mode = False
for line in open(fin).readlines():
line = line.expandtabs(4)
line = line.split("#")[0].strip()
if len(line) == 0: continue
if line == '$end':
Expand Down Expand Up @@ -181,7 +183,7 @@ def load_tcin(f_tcin):
# tcdef['dftgrid'] = "1"
# tcdef['precision'] = "mixed"
# tcdef['threspdp'] = "1.0e-8"
tcin = edit_tcin(fin=f_tcin, options={'run':'gradient'}, defaults=tcdef)
tcin = edit_tcin(fin=f_tcin, options={'run':'gradient', 'keep_scr':'yes', 'scrdir':'scr'}, defaults=tcdef)
return tcin

#====================================#
Expand All @@ -192,7 +194,11 @@ def load_tcin(f_tcin):
class Engine(object):
def __init__(self, molecule):
if len(molecule) != 1:
raise RuntimeError('Please pass only length-1 molecule objects to engine creation')
# In NEB calculations, length > 1 molecule objects may be passed. In keeping with the CLI, the last structure
# is used for Engine creation. The Engine behavior shouldn't depend on which structure is used, but if it does,
# we would need to rethink this design.
molecule = molecule[-1]
# raise RuntimeError('Please pass only length-1 molecule objects to engine creation')
self.M = deepcopy(molecule)
self.stored_calcs = OrderedDict()

Expand Down Expand Up @@ -370,13 +376,15 @@ class TeraChem(Engine):
"""
Run a TeraChem energy and gradient calculation.
"""
def __init__(self, molecule, tcin, dirname=None):
def __init__(self, molecule, tcin, dirname=None, pdb=None):
self.tcin = tcin.copy()
# Scratch folder
if 'scrdir' in self.tcin:
self.scr = self.tcin['scrdir']
else:
self.scr = 'scr'
# Always specify the TeraChem scratch folder name
self.tcin['scrdir'] = self.scr
# A few notes about the electronic structure method
self.casscf = self.tcin.get('casscf', 'no').lower() == 'yes'
self.unrestricted = (self.tcin['method'][0] == 'u')
Expand Down Expand Up @@ -404,10 +412,9 @@ def __init__(self, molecule, tcin, dirname=None):
for f in self.initguess_files:
if not os.path.exists(f):
raise TeraChemEngineError('%s guess file is missing' % f)
# Management of QM/MM: Read qmindices and
# store locations of prmtop and qmindices files,
self.qmmm = 'qmindices' in tcin
if self.qmmm:
# Management of QM/MM with AMBER: Read qmindices and store locations of prmtop and qmindices files
self.qmmm_amber = 'prmtop' in tcin
if self.qmmm_amber:
if not os.path.exists(tcin['coordinates']):
raise RuntimeError("TeraChem QM/MM coordinate file does not exist")
if not os.path.exists(tcin['prmtop']):
Expand All @@ -418,6 +425,41 @@ def __init__(self, molecule, tcin, dirname=None):
self.prmtop_name = os.path.abspath(tcin['prmtop'])
self.qmindices = [int(i.split()[0]) for i in open(self.qmindices_name).readlines()]
self.M_full = Molecule(tcin['coordinates'], ftype='inpcrd', build_topology=False)

# Management of QM/MM with openmm XML files
self.qmmm_openmm = 'system_xml' in tcin
if self.qmmm_openmm:
if not pdb:
raise RuntimeError("when system_xml is specified, pdb keyword arg must be provided to TC engine")
if self.qmmm_amber:
raise RuntimeError("prmtop and system_xml cannot both be in TC input file")
if not os.path.exists(tcin['coordinates']):
raise RuntimeError("TeraChem state XML file does not exist")
if not os.path.exists(tcin['system_xml']):
raise RuntimeError("TeraChem system XML file does not exist")
if not os.path.exists(tcin['qmindices']):
raise RuntimeError("TeraChem QM/MM qmindices file does not exist")
self.qmindices_name = os.path.abspath(tcin['qmindices'])
self.systemxml_name = os.path.abspath(tcin['system_xml'])
self.grdindices = [int(i.split()[0]) for i in open(self.qmindices_name).readlines()]
# test if "mmgrdindices.txt" exists

self.mmgrdindices_name = None
if "printmmgrad" in tcin:
if not os.path.exists(tcin['printmmgrad']):
raise RuntimeError("TeraChem printmmgrad file does not exist")
self.mmgrdindices_name = os.path.abspath(tcin['printmmgrad'])
mmgrdindices = [int(i.split()[0]) for i in open(self.mmgrdindices_name).readlines()]
self.grdindices += mmgrdindices
# remove redundant and sort indices
self.grdindices = list(set(self.grdindices))
self.grdindices.sort()
print("grdindices", self.grdindices)
# update molecule coordinates from "state.xml"
self.M_full = Molecule(pdb, build_topology=True)
self.state_xml = read_coors_from_xml(self.M_full, os.path.abspath(tcin["coordinates"]))
print("finish reading from state_xml")

super(TeraChem, self).__init__(molecule)

def orbital_filenames(self):
Expand Down Expand Up @@ -499,7 +541,12 @@ def calc_new(self, coords, dirname):
# Ensure guess files are in the correct locations
self.copy_guess_files(dirname)
# Set coordinate file name
start_xyz = 'start.rst7' if self.qmmm else 'start.xyz'
if self.qmmm_amber:
start_xyz = 'start.rst7'
elif self.qmmm_openmm:
start_xyz = 'start.xml'
else:
start_xyz = 'start.xyz'
self.tcin['coordinates'] = start_xyz
self.tcin['run'] = 'gradient'
# Write the TeraChem input file
Expand All @@ -510,12 +557,20 @@ def calc_new(self, coords, dirname):
# bak(start_xyz, cwd=dirname, start=0)
# Convert coordinates back to the xyz file
self.M.xyzs[0] = coords.reshape(-1, 3) * bohr2ang
if self.qmmm:
if self.qmmm_amber:
# Copy QMMM files to the correct locations and set positions in inpcrd/rst7 file
shutil.copy2(self.qmindices_name, dirname)
shutil.copy2(self.prmtop_name, dirname)
self.M_full.xyzs[0][self.qmindices, :] = self.M.xyzs[0]
self.M_full[0].write(os.path.join(dirname, start_xyz), ftype='inpcrd')
elif self.qmmm_openmm:
# Copy OpenMM Files
shutil.copy2(self.qmindices_name, dirname)
shutil.copy2(self.systemxml_name, dirname)
if self.mmgrdindices_name is not None:
shutil.copy2(self.mmgrdindices_name, dirname)
self.M_full.xyzs[0][self.grdindices, :] = self.M.xyzs[0]
write_coors_to_xml(self.M_full, self.state_xml, os.path.join(dirname, start_xyz))
else:
self.M[0].write(os.path.join(dirname, start_xyz))
# Run TeraChem
Expand Down Expand Up @@ -543,7 +598,12 @@ def calc_wq_new(self, coords, dirname):
# Ensure guess files are in the correct locations
guessfnms = self.copy_guess_files(dirname)
# Set coordinate file name
start_xyz = 'start.rst7' if self.qmmm else 'start.xyz'
if self.qmmm_amber:
start_xyz = 'start.rst7'
elif self.qmmm_openmm:
start_xyz = 'start.xml'
else:
start_xyz = 'start.xyz'
self.tcin['coordinates'] = start_xyz
self.tcin['run'] = 'gradient'
# For queueing up jobs, delete GPU key and let the worker decide
Expand All @@ -552,21 +612,37 @@ def calc_wq_new(self, coords, dirname):
edit_tcin(fout="%s/run.in" % dirname, options=self.tcin)
# Convert coordinates back to the xyz file
self.M.xyzs[0] = coords.reshape(-1, 3) * bohr2ang
if self.qmmm:
if self.qmmm_amber:
# Copy QMMM files to the correct locations and set positions in inpcrd/rst7 file
shutil.copy2(self.qmindices_name, dirname)
shutil.copy2(self.prmtop_name, dirname)
self.M_full.xyzs[0][self.qmindices, :] = self.M.xyzs[0]
self.M_full[0].write(os.path.join(dirname, start_xyz), ftype='inpcrd')
elif self.qmmm_openmm:
# Copy OpenMM Files
shutil.copy2(self.qmindices_name, dirname)
shutil.copy2(self.systemxml_name, dirname)
if self.mmgrdindices is not None:
shutil.copy2(self.mmgrdindices_name, dirname)
self.M_full.xyzs[0][self.grdindices, :] = self.M.xyzs[0]
write_coors_to_xml(self.M_full, self.state_xml, os.path.join(dirname, start_xyz))
else:
self.M[0].write(os.path.join(dirname, start_xyz))
# Specify WQ input and output files
in_files = [('%s/run.in' % dirname, 'run.in'), ('%s/%s' % (dirname, start_xyz), start_xyz)]
if self.qmmm:
if self.qmmm_amber:
qmindices_filename = os.path.split(self.qmindices_name)[1]
prmtop_filename = os.path.split(self.prmtop_name)[1]
in_files += [("%s/%s" % (dirname, qmindices_filename), qmindices_filename)]
in_files += [("%s/%s" % (dirname, prmtop_filename), prmtop_filename)]
elif self.qmmm_openmm:
qmindices_filename = os.path.split(self.qmindices_name)[1]
systemxml_filename = os.path.split(self.systemxml_name)[1]
in_files += [("%s/%s" % (dirname, qmindices_filename), qmindices_filename)]
in_files += [("%s/%s" % (dirname, systemxml_filename), systemxml_filename)]
if self.mmgrdindices_name is not None:
mmgrdindices_filename = os.path.split(self.mmgrdindices_name)[1]
in_files += [("%s/%s" % (dirname, mmgrdindices_filename), mmgrdindices_filename)]
for f in guessfnms:
in_files.append((os.path.join(dirname, f), f))
out_files = [('%s/run.out' % dirname, 'run.out')]
Expand All @@ -586,11 +662,21 @@ def number_output(self, dirname, calcNum):
def read_result(self, dirname, check_coord=None):
if check_coord is not None:
read_xyz_success = False
start_xyz = 'start.rst7' if self.qmmm else 'start.xyz'
if self.qmmm_amber:
start_xyz = 'start.rst7'
elif self.qmmm_openmm:
start_xyz = 'start.xml'
else:
start_xyz = 'start.xyz'
if os.path.exists(os.path.join(dirname, start_xyz)):
try:
M = Molecule(os.path.join(dirname, start_xyz))
read_xyz = M.xyzs[0][self.qmindices] if self.qmmm else M.xyzs[0]
if self.qmmm_amber:
read_xyz = M.xyzs[0][self.qmindices]
elif self.qmmm_openmm:
read_xyz = M.xyzs[0][self.grdindices]
else:
read_xyz = M.xyzs[0]
read_xyz = read_xyz.flatten() / bohr2ang
read_xyz_success = True
except: pass
Expand All @@ -610,7 +696,12 @@ def read_result(self, dirname, check_coord=None):
subprocess.call("awk '/Gradient units are Hartree/,/Net gradient|Point charge part/ {if ($1 ~ /^-?[0-9]/) {print}}' run.out > grad.txt", cwd=dirname, shell=True)
subprocess.call("awk 'BEGIN {s=0} /SPIN S-SQUARED/ {s=$3} END {printf \"%.6f\\n\", s}' run.out > s-squared.txt", cwd=dirname, shell=True)
energy = float(open(os.path.join(dirname,'energy.txt')).readlines()[0].strip())
na = len(self.qmindices) if self.qmmm else self.M.na
if self.qmmm_amber:
na = len(self.qmindices)
elif self.qmmm_openmm:
na = len(self.grdindices)
else:
na = self.M.na
gradient = np.loadtxt(os.path.join(dirname,'grad.txt'))[:na].flatten()
s2 = float(open(os.path.join(dirname,'s-squared.txt')).readlines()[0].strip())
assert gradient.shape[0] == self.M.na*3
Expand Down
4 changes: 4 additions & 0 deletions geometric/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ def calcEnergyForce(self):
spcalc['gradient'] = qm_grads_proj
self.E = spcalc['energy']
self.gradx = spcalc['gradient']
# gx = self.gradx.reshape(-1, 3)
# for i in range(gx.shape[0]):
# print("% 10.6f % 10.6f % 10.6f" % (gx[i, 0], gx[i, 1], gx[i, 2]))
# sys.exit()
# Calculate Hessian at the first step, or at each step if desired
if self.params.hessian == 'each' or self.recalcHess:
# Hx is assumed to be the Cartesian Hessian at the current step.
Expand Down
37 changes: 32 additions & 5 deletions geometric/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from .molecule import Molecule, Elements
from .nifty import logger, isint, uncommadash, bohr2ang, ang2bohr
from .rotate import calc_fac_dfac
from .xml_helper import read_coors_from_xml, write_coors_to_xml

def get_molecule_engine(**kwargs):
"""
Expand Down Expand Up @@ -148,8 +149,9 @@ def get_molecule_engine(**kwargs):
# 2) The geomeTRIC optimizer only "sees" the part of the molecule being optimized.
# 3) The TeraChem engine writes .rst7 files instead of .xyz files by inserting the
# optimization coordinates into the correct locations.
qmmm = 'qmindices' in tcin
if qmmm:
qmmm_amber = 'prmtop' in tcin
qmmm_openmm = 'system_xml' in tcin
if qmmm_amber:
try:
from openmm.app import AmberPrmtopFile
except ImportError:
Expand All @@ -173,6 +175,33 @@ def get_molecule_engine(**kwargs):
M.top_settings['radii'] = radii
M.top_settings['fragment'] = frag
M.build_topology()
elif qmmm_openmm:
if qmmm_amber:
raise RuntimeError("prmtop and system_xml cannot both be in TC input file")
if not pdb:
raise RuntimeError("TeraChem/OpenMM requires PDB file in get_molecule_engine")
if not os.path.exists(tcin['coordinates']):
raise RuntimeError("TeraChem/OpenMM State XML file does not exist")
if not os.path.exists(tcin['system_xml']):
raise RuntimeError("TeraChem/OpenMM System XML file does not exist")
if not os.path.exists(tcin['qmindices']):
raise RuntimeError("TeraChem QM/MM qmindices file does not exist")

M_full = Molecule(pdb)
read_coors_from_xml(M_full, tcin['coordinates'])

qmindices_name = tcin['qmindices']
grdindices = [int(i.split()[0]) for i in open(qmindices_name).readlines()]
if 'printmmgrad' in tcin:
mmgrdindices = [int(i.split()[0]) for i in open(tcin['printmmgrad']).readlines()]
grdindices += mmgrdindices
# remove redundant and sort indices
grdindices = list(set(grdindices))
grdindices.sort()
M = M_full.atom_select(grdindices)
M.top_settings['radii'] = radii
M.top_settings['fragment'] = frag
M.build_topology()
elif pdb is not None:
M = Molecule(pdb, radii=radii, fragment=frag)
else:
Expand All @@ -181,9 +210,7 @@ def get_molecule_engine(**kwargs):
M = Molecule(tcin['coordinates'], radii=radii, fragment=frag)
M.charge = tcin['charge']
M.mult = tcin.get('spinmult',1)
# The TeraChem engine needs to write rst7 files before calling TC
# and also make sure the prmtop and qmindices.txt files are present.
engine = TeraChem(M[-1], tcin, dirname=dirname)
engine = TeraChem(M, tcin, dirname=dirname, pdb=pdb)
elif engine_str == 'qchem':
logger.info("Q-Chem engine selected. Expecting Q-Chem input for gradient calculation.\n")
# The file from which we make the Molecule object
Expand Down
37 changes: 37 additions & 0 deletions geometric/xml_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import xml.etree.ElementTree as ET
from .molecule import Molecule
import copy

def read_coors_from_xml(M, state_xml):
with open(state_xml) as fobj:
xml = fobj.read()
root = ET.fromstring(xml)

for child in root:
if child.tag == "Positions":
for aid in range(M.na):
x = float(child[aid].attrib['x'])
y = float(child[aid].attrib['y'])
z = float(child[aid].attrib['z'])
M.xyzs[0][aid, 0] = x*10.0
M.xyzs[0][aid, 1] = y*10.0
M.xyzs[0][aid, 2] = z*10.0
return root;


def write_coors_to_xml(M, root_template, filename):
root = copy.deepcopy(root_template)
for child in root:
if child.tag == "Positions":
for aid in range(M.na):
x = M.xyzs[0][aid, 0] / 10.0
y = M.xyzs[0][aid, 1] / 10.0
z = M.xyzs[0][aid, 2] / 10.0
child[aid].attrib['x'] = "%.16e" % x
child[aid].attrib['y'] = "%.16e" % y
child[aid].attrib['z'] = "%.16e" % z
# write
fout = open(filename, "w")
print(ET.tostring(root).decode("utf-8"), file=fout)
fout.close()

0 comments on commit 9981b39

Please sign in to comment.