Skip to content

Commit

Permalink
implement scripts for generating and running validations of single po…
Browse files Browse the repository at this point in the history
…int energies
  • Loading branch information
yudongqiu committed Aug 20, 2019
1 parent f9007e3 commit 7e8eea1
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 5 deletions.
40 changes: 35 additions & 5 deletions test_tools/gen_release.py
Expand Up @@ -6,6 +6,7 @@

this_file_folder = os.path.dirname(os.path.realpath(__file__))
analysis_script_folder = os.path.join(os.path.dirname(this_file_folder), 'analysis_scripts')
validate_script_folder = os.path.join(os.path.dirname(this_file_folder), 'validation_scripts')

# make a "release" folder
os.mkdir('release')
Expand All @@ -23,6 +24,9 @@
shutil.copytree(os.path.join('../..', dnm), dnm)
os.chdir('..')

# copy the result force field file here
shutil.copyfile(os.path.join('fb-fit', 'result', 'optimize', 'param_valence.offxml'), 'result.offxml')


# make a "analysis" sub-folder
os.mkdir('analysis')
Expand All @@ -31,8 +35,8 @@

# analysis script and running commands
analysis_scripts_cmd = {
'visualize_fb_parameters.py': '../../optimize.out -x ../../forcefield/param_valence.offxml',
'plot_optgeo_each_smirks.py': '-x ../../forcefield/param_valence.offxml --new_xml ../../result/optimize/param_valence.offxml -f ../../optimize.tmp -t ../../targets -j ../../smirnoff_parameter_assignments.json',
'visualize_fb_parameters.py': '../../optimize.out -x ../result.offxml',
'plot_optgeo_each_smirks.py': '-x ../../forcefield/param_valence.offxml --new_xml ../result.offxml -f ../../optimize.tmp -t ../../targets -j ../../smirnoff_parameter_assignments.json',
'plot_td_energies.py': '-f ../../optimize.tmp/ -t ../../targets/',
'plot_vibfreq_rmsd.py': '-f ../../optimize.tmp',
}
Expand All @@ -47,9 +51,35 @@ def dualprint(msg, fp, **kwargs):
command = ['python', os.path.join(analysis_script_folder, script)] + arguments.split()
dualprint(f"\n\n-= Running script {script} =-", fp)
dualprint(f"\n command:\n" + ' '.join(command), fp)
print(f"\n stdout:\n", file=fp)
subprocess.run(command, check=True, stdout=fp)
print(f"\n stdout:\n", file=fp, flush=True)
subprocess.run(command, check=True, stdout=fp, stderr=fp)

os.chdir('..')

print("release/analysis folder created successfully")
print("All analysis results are generated successfully")

# make a "validate_tools" folder
os.mkdir('validate_tools')
os.chdir('validate_tools')
print("release/validate_tools folder created")

validate_scripts_cmd = {
'prepare_validate_single_point.py': '-x ../result.offxml -t ../../targets',
}

with open('prep_validation_tools.log', 'w') as fp:
for script, arguments in validate_scripts_cmd.items():
command = ['python', os.path.join(validate_script_folder, script)] + arguments.split()
dualprint(f"\n\n-= Running script {script} =-", fp)
dualprint(f"\n command:\n" + ' '.join(command), fp)
print(f"\n stdout:\n", file=fp, flush=True)
subprocess.run(command, check=True, stdout=fp, stderr=fp)

with open('README.txt', 'w') as fnote:
fnote.write("Validate tools are provided in this folder\n")
fnote.write("To validate a test force field against the resulting force field in this release, go to the validate_single_point folder, then run\n\n")
fnote.write('python run_validate_single_point.py test_forcefield.offxml\n')

print("All validation tools created successfully")

os.chdir('..')
141 changes: 141 additions & 0 deletions validation_scripts/prepare_validate_single_point.py
@@ -0,0 +1,141 @@
#!/usr/bin/env python

import os
import shutil
import subprocess
import pickle

from simtk import openmm

from forcebalance.molecule import Molecule

from openforcefield.typing.engines.smirnoff import ForceField
from openforcefield.topology import Molecule as OffMolecule
from openforcefield.topology import Topology as OffTopology

this_file_folder = os.path.dirname(os.path.realpath(__file__))

def read_molecules_from_optgeo_target(target_fol):
# read all molecules from an optgeo target folder
res = {}
with open(os.path.join(target_fol, 'notes.txt')) as fnote:
for line in fnote:
ls = line.split()
if len(ls) == 7 and ls[-1] == 'SUCCESS':
assert ls[3] == 'molecule_id'
mol_id = ls[4]
fnm_pre = ls[0]
# read content of mol2 file
mol2_fnm = fnm_pre + '.mol2'
with open(os.path.join(target_fol, mol2_fnm)) as fmol2:
mol2_str = fmol2.read()
# read content of pdb file
pdb_fnm = fnm_pre + '.pdb'
with open(os.path.join(target_fol, pdb_fnm)) as f:
pdb_str = f.read()
# read QM geometry from xyz file
xyz_fnm = fnm_pre + '.xyz'
m = Molecule(os.path.join(target_fol, xyz_fnm))
data = {
'mol2_str': mol2_str,
'pdb_str': pdb_str,
'qm_geo': m.xyzs[0]
}
res[mol_id] = data
return res

def gen_bench_data(forcefield, data):
# write mol2 file
mol2_fnm = 'input.mol2'
with open(mol2_fnm, 'w') as f:
f.write(data['mol2_str'])
# write pdb file
pdb_fnm = 'conf.pdb'
with open(pdb_fnm, 'w') as f:
f.write(data['pdb_str'])
off_mol = OffMolecule.from_file(mol2_fnm)
# compute the partial charges explicitly
off_mol.generate_conformers()
off_mol.compute_partial_charges_am1bcc()
# save partial charges
partial_charges = off_mol.partial_charges
top = OffTopology.from_molecules([off_mol])
system = forcefield.create_openmm_system(top, charge_from_molecules=[off_mol])
# create simulation
omm_topology = openmm.app.PDBFile(pdb_fnm).topology
integrator = openmm.LangevinIntegrator(273.15, 1.0, 1.0)
simulation = openmm.app.Simulation(omm_topology, system, integrator)
# qm position and energy
qm_pos = data['qm_geo'] * openmm.unit.angstrom
simulation.context.setPositions(qm_pos)
e_geo1 = simulation.context.getState(getEnergy=True).getPotentialEnergy()
# mm minimized position and energy
for _ in range(10):
# run the minimization several times to make it more stable
simulation.minimizeEnergy(tolerance=1e-8, maxIterations=10000)
state = simulation.context.getState(getEnergy=True, getPositions=True)
mm_pos = state.getPositions(asNumpy=True)
e_geo2 = state.getPotentialEnergy()
bench_data = {
'mol2_fnm': mol2_fnm,
'pdb_fnm': pdb_fnm,
'partial_charges': partial_charges,
'geo1_qm': qm_pos,
'geo2_mm': mm_pos,
'energy_geo1': e_geo1,
'energy_geo2': e_geo2,
}
return bench_data

def gen_validate_single_point_folder(ffxml, targets_folder, targets_prefix):
forcefield = ForceField(ffxml, allow_cosmetic_attributes=True)
# collect molecule QM data from target folders
mol_data = {}
for target_fol in os.listdir(targets_folder):
if target_fol.startswith(targets_prefix):
path = os.path.join(targets_folder, target_fol)
data = read_molecules_from_optgeo_target(path)
print(f'Read {len(data)} molecule QM data from {path}')
mol_data.update(data)
print(f"Total {len(mol_data)} molecules found from target folders")
# create a new folder
fol_nm = 'validate_single_point'
if os.path.exists(fol_nm):
shutil.rmtree(fol_nm)
os.mkdir(fol_nm)
os.chdir(fol_nm)
# create a "mol_data" folder to put all molecule data
os.mkdir('mol_data')
os.chdir('mol_data')
# for each molecule, create a new subfolder and place the relative files in there
print(f'Generating benchmark data by MM energy evaluation and minimizations')
i = 0
for mol_id, data in mol_data.items():
os.mkdir(mol_id)
os.chdir(mol_id)
# generate benchmark data for a single molecule
bench_data = gen_bench_data(forcefield, data)
# save the bench_data as pickle file
with open('bench_data.pickle', 'wb') as pfile:
pickle.dump(bench_data, pfile)
os.chdir('..')
i += 1
if i % 10 == 0:
print(f'processed {i:4d} molecules')
os.chdir('..')
# copy and paste the "run_validate_single_point.py" script to here
shutil.copyfile(os.path.join(this_file_folder, 'run_validate_single_point.py'), 'run_validate_single_point.py')
os.chdir('..')

def main():
import argparse
parser = argparse.ArgumentParser('Prepare a validate folder containing data for single-point validations, from optgeo targets')
parser.add_argument('-x', '--ffxml', help='Forcefield file to use')
parser.add_argument('-t', '--targets_folder', help='Path to the FB targets folder')
parser.add_argument('-p', '--targets_prefix', default='optgeo_SMIRNOFF_Coverage_Set', help='Prefix of targets for selection')
args = parser.parse_args()

gen_validate_single_point_folder(args.ffxml, args.targets_folder, args.targets_prefix)

if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions validation_scripts/run_validate_single_point.py
@@ -0,0 +1,72 @@
#!/usr/bin/env python

import os
import pickle
import time

from simtk import openmm, unit

from openforcefield.typing.engines.smirnoff import ForceField
from openforcefield.topology import Molecule as OffMolecule
from openforcefield.topology import Topology as OffTopology


def eval_single_point_energies(forcefield, partial_charges, mol2_path, pdb_path, positions_list):
""" Evaluate single-point energies of a molecule on one or more positions """
off_mol = OffMolecule.from_file(mol2_path)
# load partial charges
off_mol.partial_charges = partial_charges
off_top = OffTopology.from_molecules([off_mol])
system = forcefield.create_openmm_system(off_top, charge_from_molecules=[off_mol])
# create openmm simulation
omm_topology = openmm.app.PDBFile(pdb_path).topology
integrator = openmm.LangevinIntegrator(273.15, 1.0, 1.0)
simulation = openmm.app.Simulation(omm_topology, system, integrator)
# evaluate energy of each position
result_energy_list = []
for position in positions_list:
simulation.context.setPositions(position)
e = simulation.context.getState(getEnergy=True).getPotentialEnergy()
result_energy_list.append(e)
return result_energy_list

def run_validate_single_point(ffxml, data_folder):
forcefield = ForceField(ffxml, allow_cosmetic_attributes=True)
ff_name = os.path.basename(ffxml)
# read molecules from the data folder
bench_mol_folders = sorted(os.listdir(data_folder))
print(f'\n*** Single point benchmark on QM and MM minimized geometries ***')
print(f"- Number of molecules: {len(bench_mol_folders)}")
print(f"- Test forcefield : {ff_name}")
print(f"- Start Time : {time.ctime()}")
print(f"- Unit : kcal/mol \n")
print(f'{"":5s} | {"":15s} | {"QM Geometry":^47s} | {"MM Geometry":^47s}')
print(f'{"#":>5s} | {"Molecule ID":15s} | {"E_release":>15s} {"E_test":>15s} {"Diff":>15s} | {"E_release":>15s} {"E_test":>15s} {"Diff":>15s}')
for i, mol_fol in enumerate(bench_mol_folders):
folder_path = os.path.join(data_folder, mol_fol)
# read bench data from folder
with open(os.path.join(folder_path, 'bench_data.pickle'), 'rb') as pfile:
bench_data = pickle.load(pfile)
mol2_path = os.path.join(folder_path, bench_data['mol2_fnm'])
pdb_path = os.path.join(folder_path, bench_data['pdb_fnm'])
positions_list = [bench_data['geo1_qm'], bench_data['geo2_mm']]
ref_energies = [bench_data['energy_geo1'], bench_data['energy_geo2']]
eval_energies = eval_single_point_energies(forcefield, bench_data['partial_charges'], mol2_path, pdb_path, positions_list)
e_ref_geo1 = ref_energies[0].value_in_unit(unit.kilocalorie_per_mole)
e_ref_geo2 = ref_energies[1].value_in_unit(unit.kilocalorie_per_mole)
e_test_geo1 = eval_energies[0].value_in_unit(unit.kilocalorie_per_mole)
e_test_geo2 = eval_energies[1].value_in_unit(unit.kilocalorie_per_mole)
print(f'{i:5d} | {mol_fol:15s} | {e_ref_geo1:15.3f} {e_test_geo1:15.3f} {e_test_geo1-e_ref_geo1:15.3f} | {e_ref_geo2:15.3f} {e_test_geo2:15.3f} {e_test_geo2-e_ref_geo2:15.3f}')


def main():
import argparse
parser = argparse.ArgumentParser('Prepare a validate folder containing data for single-point validations, from optgeo targets')
parser.add_argument('ffxml', help='Forcefield file to benchmark')
parser.add_argument('-d', '--data_folder', default='mol_data', help='Path to the data folder')
args = parser.parse_args()

run_validate_single_point(args.ffxml, args.data_folder)

if __name__ == "__main__":
main()

0 comments on commit 7e8eea1

Please sign in to comment.