-
Notifications
You must be signed in to change notification settings - Fork 87
/
conformer_energies.py
139 lines (126 loc) · 6.19 KB
/
conformer_energies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import argparse
import openmm
from openff.units.openmm import from_openmm, to_openmm
from openmm import app
from openmm import unit as openmm_unit
from rdkit.Chem import rdMolAlign
from openff.toolkit import ForceField, Molecule, Topology
from openff.toolkit.utils import RDKitToolkitWrapper
def compute_conformer_energies_from_file(filename):
# Load in the molecule and its conformers.
# Note that all conformers of the same molecule are loaded as separate Molecule objects
# If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide
# different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior
rdktkw = RDKitToolkitWrapper()
loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw)
# The logic below only works for lists of molecules, so if a
# single molecule was loaded, cast it to list
if type(loaded_molecules) is not list:
loaded_molecules = [loaded_molecules]
# Collatate all conformers of the same molecule
# NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules;
# it is just needed because our SDF reader does not automatically collapse conformers.
molecules = [loaded_molecules[0]]
for molecule in loaded_molecules[1:]:
if molecule == molecules[-1]:
for conformer in molecule.conformers:
molecules[-1].add_conformer(conformer)
else:
molecules.append(molecule)
n_molecules = len(molecules)
n_conformers = sum([mol.n_conformers for mol in molecules])
print(
f"{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers"
)
# Load the openff-2.0.0 force field appropriate for vacuum calculations (without constraints)
forcefield_str = "openff_unconstrained-2.0.0.offxml"
forcefield = ForceField(forcefield_str)
# Loop over molecules and minimize each conformer
for molecule in molecules:
# If the molecule doesn't have a name, set mol.name to be the hill formula
if molecule.name == "":
molecule.name = Topology._networkx_to_hill_formula(molecule.to_networkx())
print("%s : %d conformers" % (molecule.name, molecule.n_conformers))
# Make a temporary copy of the molecule that we can update for each minimization
mol_copy = Molecule(molecule)
# Make an OpenFF Topology so we can parameterize the system
off_top = molecule.to_topology()
print(f"Parametrizing {molecule.name} (may take a moment to calculate charges)")
system = forcefield.create_openmm_system(off_top)
# Use OpenMM to compute initial and minimized energy for all conformers
integrator = openmm.VerletIntegrator(1 * openmm_unit.femtoseconds)
platform = openmm.Platform.getPlatformByName("Reference")
omm_top = off_top.to_openmm()
simulation = app.Simulation(omm_top, system, integrator, platform)
# Print text header
print(
"Using force field",
forcefield_str,
"\nConformer Initial PE Minimized PE RMS between initial and minimized conformer",
)
output = [
[
"Conformer",
"Initial PE (kcal/mol)",
"Minimized PE (kcal/mol)",
"RMS between initial and minimized conformer (Angstrom)",
]
]
for conformer_index, conformer in enumerate(molecule.conformers):
simulation.context.setPositions(to_openmm(conformer))
orig_potential = simulation.context.getState(
getEnergy=True
).getPotentialEnergy()
simulation.minimizeEnergy()
min_state = simulation.context.getState(getEnergy=True, getPositions=True)
min_potential = min_state.getPotentialEnergy()
# Calculate the RMSD between the initial and minimized conformer
min_coords = min_state.getPositions()
min_coords = from_openmm(min_coords)
mol_copy._conformers = None
mol_copy.add_conformer(conformer)
mol_copy.add_conformer(min_coords)
rdmol = mol_copy.to_rdkit()
rmslist = []
rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)
minimization_rms = rmslist[0]
# Save the minimized conformer to file
mol_copy._conformers = None
mol_copy.add_conformer(min_coords)
mol_copy.to_file(
f"{molecule.name}_conf{conformer_index+1}_minimized.sdf",
file_format="sdf",
)
print(
"%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol %8.3f Angstroms"
% (
conformer_index + 1,
molecule.n_conformers,
orig_potential.value_in_unit(openmm_unit.kilocalories_per_mole),
min_potential.value_in_unit(openmm_unit.kilocalories_per_mole),
minimization_rms,
)
)
output.append(
[
str(conformer_index + 1),
f"{orig_potential/openmm_unit.kilocalories_per_mole:.3f}",
f"{min_potential/openmm_unit.kilocalories_per_mole:.3f}",
f"{minimization_rms:.3f}",
]
)
# Write the results out to CSV
with open(f"{molecule.name}.csv", "w") as of:
for line in output:
of.write(",".join(line) + "\n")
# Clean up OpenMM Simulation
del simulation, integrator
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Perform energy minimization on a molecule, potentially with many conformers. For each conformer, this script will provide the initial energy, minimized energy, and RMSD between the initial and minimized structure (both as STDOUT and a csv file). The minimized conformers will be written out to SDF."
)
parser.add_argument(
"-f", "--filename", help="Name of an input file containing conformers"
)
args = parser.parse_args()
compute_conformer_energies_from_file(args.filename)