Skip to content

Commit

Permalink
Merge pull request #293 from openmm/fix/system_template_generator
Browse files Browse the repository at this point in the history
Fix system and template generator for Espaloma 0.3.*
  • Loading branch information
jchodera committed Oct 12, 2023
2 parents 3970498 + ec15b9c commit 6fe5f25
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 45 deletions.
10 changes: 2 additions & 8 deletions devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: openmmforcefields-test

channels:
- conda-forge
- dglteam # espaloma; TODO: Remove this once espaloma is fully on conda-forge
- defaults
- openeye

dependencies:
Expand All @@ -24,7 +22,7 @@ dependencies:

# Requirements for conversion tools
- pyyaml
- ambertools =22
- ambertools <23
- lxml
- networkx

Expand All @@ -38,9 +36,5 @@ dependencies:

#
# Espaloma requirements below
# TODO: Rework this once espaloma is on conda-forge
#
- pytorch >=1.8.0
- dgl <1
- qcportal >=0.15.0
- espaloma
- espaloma >=0.3.1
8 changes: 6 additions & 2 deletions openmmforcefields/generators/system_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class SystemGenerator:
postprocess_system : method
If not None, this method will be called as ``system = postprocess_system(system)`` to post-process the System object for create_system(topology) before it is returned.
"""
def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', forcefield_kwargs=None, nonperiodic_forcefield_kwargs=None, periodic_forcefield_kwargs=None, barostat=None, molecules=None, cache=None, postprocess_system=None):
def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', forcefield_kwargs=None, nonperiodic_forcefield_kwargs=None, periodic_forcefield_kwargs=None, template_generator_kwargs=None, barostat=None, molecules=None, cache=None, postprocess_system=None):
"""
This is a utility class to generate OpenMM Systems from Open Force Field Topology objects using AMBER
protein force fields and GAFF small molecule force fields.
Expand All @@ -89,6 +89,9 @@ def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', f
Keyword arguments added to forcefield_kwargs when the Topology is non-periodic.
periodic_forcefield_kwargs : NonbondedMethod, optional, default={'nonbondedMethod' : PME}
Keyword arguments added to forcefield_kwargs when the Topology is periodic.
template_generator_kwargs : dict, optional, default=None
Keyword arguments to be passed to subclasses of ``openmmforcefields.generators.template_generators.SmallMoleculeTemplateGenerator``.
Currently only used for ``openmmforcefields.generators.template_generators.EspalomaTemplateGenerator``.
barostat : openmm.MonteCarloBarostat, optional, default=None
If not None, a new ``MonteCarloBarostat`` with matching parameters (but a different random number seed) will be created and
added to each newly created ``System``.
Expand Down Expand Up @@ -184,6 +187,7 @@ def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', f
self.forcefield_kwargs = forcefield_kwargs if forcefield_kwargs is not None else dict()
self.nonperiodic_forcefield_kwargs = nonperiodic_forcefield_kwargs if nonperiodic_forcefield_kwargs is not None else {'nonbondedMethod' : app.NoCutoff}
self.periodic_forcefield_kwargs = periodic_forcefield_kwargs if periodic_forcefield_kwargs is not None else {'nonbondedMethod' : app.PME}
self.template_generator_kwargs = template_generator_kwargs

# Raise an exception if nonbondedForce is specified in forcefield_kwargs
if 'nonbondedMethod' in self.forcefield_kwargs:
Expand All @@ -200,7 +204,7 @@ def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', f
for template_generator_cls in SmallMoleculeTemplateGenerator.__subclasses__():
try:
_logger.debug(f'Trying {template_generator_cls.__name__} to load {small_molecule_forcefield}')
self.template_generator = template_generator_cls(forcefield=small_molecule_forcefield, cache=cache)
self.template_generator = template_generator_cls(forcefield=small_molecule_forcefield, cache=cache, template_generator_kwargs=self.template_generator_kwargs)
break
except ValueError as e:
_logger.debug(f' {template_generator_cls.__name__} cannot load {small_molecule_forcefield}')
Expand Down
101 changes: 77 additions & 24 deletions openmmforcefields/generators/template_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import contextlib
import logging
import os
import warnings

_logger = logging.getLogger("openmmforcefields.generators.template_generators")

Expand Down Expand Up @@ -400,7 +401,7 @@ class GAFFTemplateGenerator(SmallMoleculeTemplateGenerator):
"""
INSTALLED_FORCEFIELDS = ['gaff-1.4', 'gaff-1.8', 'gaff-1.81', 'gaff-2.1', 'gaff-2.11']

def __init__(self, molecules=None, forcefield=None, cache=None):
def __init__(self, molecules=None, forcefield=None, cache=None, **kwargs):
"""
Create a GAFFTemplateGenerator with some OpenFF toolkit molecules
Expand All @@ -420,7 +421,6 @@ def __init__(self, molecules=None, forcefield=None, cache=None):
GAFF force field to use, one of ['gaff-1.4', 'gaff-1.8', 'gaff-1.81', 'gaff-2.1', 'gaff-2.11']
If not specified, the latest GAFF supported version is used.
GAFFTemplateGenerator.INSTALLED_FORCEFIELDS contains a complete up-to-date list of supported force fields.
Examples
--------
Expand Down Expand Up @@ -1207,7 +1207,7 @@ class SMIRNOFFTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin
Newly parameterized molecules will be written to the cache, saving time next time!
"""
def __init__(self, molecules=None, cache=None, forcefield=None):
def __init__(self, molecules=None, cache=None, forcefield=None, **kwargs):
"""
Create a SMIRNOFFTemplateGenerator with some OpenFF toolkit molecules
Expand All @@ -1226,7 +1226,6 @@ def __init__(self, molecules=None, cache=None, forcefield=None):
forcefield : str, optional, default=None
Name of installed SMIRNOFF force field (without .offxml) or local .offxml filename (with extension).
If not specified, the latest Open Force Field Initiative release is used.
Examples
--------
Expand Down Expand Up @@ -1451,8 +1450,13 @@ class EspalomaTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin
"""
OpenMM ForceField residue template generator for espaloma force fields using pre-cached OpenFF toolkit molecules.
Open Force Field Initiative: http://openforcefield.org
Espaloma: https://github.com/choderalab/espaloma
Espaloma uses a graph net approach to chemical perception to assign parameters and charges.
* Espaloma docs and papers: https://docs.espaloma.org/
* Espaloma code and models: https://github.com/choderalab/espaloma
* Open Force Field Initiative: http://openforcefield.org
.. warning :: This API is experimental and subject to change.
Examples
--------
Expand All @@ -1473,16 +1477,16 @@ class EspalomaTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin
>>> # Register the template generator
>>> forcefield.registerTemplateGenerator(template_generator.generator)
Create a template generator for a specific Espaloma release ('espaloma-0.2.2')
Create a template generator for a specific Espaloma release ('espaloma-0.3.2')
and register multiple molecules:
>>> molecule1 = Molecule.from_smiles('c1ccccc1')
>>> molecule2 = Molecule.from_smiles('CCO')
>>> template_generator = EspalomaTemplateGenerator(molecules=[molecule1, molecule2], forcefield='espaloma-0.2.2')
>>> template_generator = EspalomaTemplateGenerator(molecules=[molecule1, molecule2], forcefield='espaloma-0.3.2')
Alternatively, you can specify a local .pt parameter file for Espaloma:
>>> template_generator = EspalomaTemplateGenerator(molecules=[molecule1, molecule2], forcefield='espaloma-0.2.2.pt')
>>> template_generator = EspalomaTemplateGenerator(molecules=[molecule1, molecule2], forcefield='espaloma-0.3.2.pt')
You can also add some Molecules later on after the generator has been registered:
Expand All @@ -1496,7 +1500,9 @@ class EspalomaTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin
Newly parameterized molecules will be written to the cache, saving time next time!
"""
def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path=None):
CHARGE_METHODS = ('nn', 'am1-bcc', 'gasteiger', 'from-molecule')

def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path=None, template_generator_kwargs=None, **kwargs):
"""
Create an EspalomaTemplateGenerator with some OpenFF toolkit molecules
Expand All @@ -1514,12 +1520,20 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path
Filename for global caching of parameters.
If specified, parameterized molecules will be stored in a TinyDB instance as a JSON file.
forcefield : str, optional, default=None
Name of installed Espaloma force field version (e.g. 'espaloma-0.2.2') to retrieve remotely,
Name of installed Espaloma force field version (e.g. 'espaloma-0.3.2') to retrieve remotely,
a local Espaloma .pt parmaeters filename (with extension),
or a URL to an online espaloma force field.
model_cache_path : str, optional, default=None
If specified, use this directory to cache espaloma models
default: ~/.espaloma/
template_generator_kwargs : dict, optional, default=None
Optional keyword arguments:
{"reference_forcefield": str, Openff force field supported by https://github.com/openforcefield/openff-forcefields without .offxml extension}
{"charge_method": str, Charge method supported by espaloma ['nn', 'am1-bcc', 'gasteiger', 'from-molecule']}
Default behavior is to use ``openff_unconstrained-2.0.0`` for ``reference_forcefield`` and `nn` for `charge_method`.
User defined charges can be assigned by setting the ``charge_method`` to ``from_molecule``
if charges are assigned to openff.toolkit.topology.Molecule.
Examples
--------
Expand All @@ -1535,29 +1549,34 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path
>>> forcefield = ForceField(*amber_forcefields)
>>> espaloma_generator.forcefield
'espaloma-0.2.2'
'espaloma-0.3.2'
You can check which espaloma parameter set force field filename is in use with
>>> espaloma_generator.espaloma_filename
'/.../espaloma-0.2.2.pt'
'/.../espaloma-0.3.2.pt'
Create a template generator for a specific SMIRNOFF force field for multiple
molecules read from an SDF file or list of SMILES strings:
>>> molecules = Molecule.from_file('molecules.sdf') # doctest: +SKIP
>>> molecules = [Molecule.from_smiles(smiles) for smiles in ["CCO", "c1ccccc1"]]
>>> espaloma_generator = EspalomaTemplateGenerator(molecules=molecules, forcefield='espaloma-0.2.2')
>>> espaloma_generator = EspalomaTemplateGenerator(molecules=molecules, forcefield='espaloma-0.3.2')
You can also add molecules later on after the generator has been registered:
>>> espaloma_generator.add_molecules(molecules[-1])
You can optionally create or use a cache of pre-parameterized molecules:
>>> espaloma_generator = EspalomaTemplateGenerator(cache='smirnoff.json', forcefield='espaloma-0.2.2')
>>> espaloma_generator = EspalomaTemplateGenerator(cache='smirnoff.json', forcefield='espaloma-0.3.2')
Newly parameterized molecules will be written to the cache, saving time next time!
You can also pass a template_generator_kwargs to specify the reference_forcefield and/or charge_method in EspalomaTemplateGenerator:
>>> template_generator_kwargs = {"reference_forcefield": "openff_unconstrained-2.0.0", "charge_method": "nn"}
>>> espaloma_generator = EspalomaTemplateGenerator(cache='smirnoff.json', forcefield='espaloma-0.3.2', template_generator_kwargs=template_generator_kwargs)
"""
# Initialize molecules and cache
super().__init__(molecules=molecules, cache=cache)
Expand All @@ -1570,15 +1589,23 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path
self.ESPALOMA_MODEL_CACHE_PATH = model_cache_path

if forcefield is None:
# Use latest supported Open Force Field Initiative release if none is specified
forcefield = 'espaloma-0.2.2'
# Use latest supported Espaloma force field release if none is specified
forcefield = 'espaloma-0.3.2'
# TODO: After toolkit provides date-ranked force fields,
# use latest dated version if we can sort by date, such as self.INSTALLED_FORCEFIELDS[-1]
self._forcefield = forcefield

# Check that espaloma model parameters can be found or locally cached
self.espaloma_model_filepath = self._get_model_filepath(forcefield)

# Check reference forcefield and charge method
if template_generator_kwargs is not None:
self._reference_forcefield = template_generator_kwargs.get('reference_forcefield', 'openff_unconstrained-2.0.0')
self._charge_method = template_generator_kwargs.get('charge_method', 'nn')
else:
self._reference_forcefield = 'openff_unconstrained-2.0.0'
self._charge_method = 'from-molecule'

# Check to make sure dependencies are installed
try:
import espaloma
Expand All @@ -1597,6 +1624,7 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path
# Load torch model
import torch
self.espaloma_model = torch.load(self.espaloma_model_filepath, map_location=torch.device('cpu'))
self.espaloma_model.eval()

# Cache a copy of the OpenMM System generated for each molecule for testing purposes
self.clear_system_cache()
Expand All @@ -1609,7 +1637,7 @@ def INSTALLED_FORCEFIELDS(self):
# TODO: Update this
# TODO: Can we list force fields installed locally?
# TODO: Maybe we can check ~/.espaloma and ESPALOMA_PATH?
return ['espaloma-0.2.2']
return ['espaloma-0.3.2']

def _get_model_filepath(self, forcefield):
"""Retrieve local file path to cached espaloma model parameters, or retrieve remote model if needed.
Expand Down Expand Up @@ -1646,7 +1674,7 @@ def _get_model_filepath(self, forcefield):
import re
m = re.match(r'espaloma-(\d+\.\d+\.\d+)', forcefield)
if m is None:
raise ValueError(f'Espaloma model must be filepath or formatted like "espaloma-0.2.2" (found: "{forcefield}")')
raise ValueError(f'Espaloma model must be filepath or formatted like "espaloma-0.3.2" (found: "{forcefield}")')
version = m.group(1)
# Construct URL
url = f'https://github.com/choderalab/espaloma/releases/download/{version}/espaloma-{version}.pt'
Expand Down Expand Up @@ -1705,6 +1733,7 @@ def generate_residue_template(self, molecule, residue_atoms=None):
* The residue template will be named after the SMILES of the molecule.
"""
from openmm import unit
# Use the canonical isomeric SMILES to uniquely name the template
smiles = molecule.to_smiles()
_logger.info(f'Generating a residue template for {smiles} using {self._forcefield}')
Expand All @@ -1723,15 +1752,39 @@ def generate_residue_template(self, molecule, residue_atoms=None):
from espaloma.graphs.utils.regenerate_impropers import regenerate_impropers
regenerate_impropers(molecule_graph)

# Book keep partial charges if molecule has user charges
# NOTE: Charges will be overwritten when the Espaloma Graph object is loaded into an espaloma model
if self._molecule_has_user_charges(molecule):
# TODO: Change this to use openff.units exclusively?
# Make sure charges are in the right openmm units
_partial_charges = molecule.partial_charges
if all([isinstance(_charge, unit.Quantity) for _charge in _partial_charges]):
_charges = _partial_charges.value_in_unit(esp.units.CHARGE_UNIT)
else:
# Assuming charges are in openff units
_charges = _partial_charges.magnitude

# Assign parameters
self.espaloma_model(molecule_graph.heterograph)

# Create an OpenMM System
if self._molecule_has_user_charges(molecule):
system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph, charge_method='from-molecule')
else:
# use espaloma charges
system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph, charge_method='nn')
# Update partial charges if charge_method is "from_molecule"
if self._charge_method == 'from-molecule':
if self._molecule_has_user_charges(molecule):
import torch
import numpy as np
# Handle ValueError:
# "ValueError: given numpy array has byte order different from the native byte order.
# Conversion between byte orders is currently not supported."
_charges = _charges.astype(np.float32)
molecule_graph.nodes['n1'].data['q'] = torch.from_numpy(_charges).unsqueeze(-1).float()
else:
# No charges were found in molecule -- defaulting to nn charge method
warnings.warn("No charges found in molecule. Defaulting to 'nn' charge method.")
self._charge_method = "nn"

system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph, charge_method=self._charge_method, forcefield=self._reference_forcefield)
_logger.info(f'Generating a system with charge method {self._charge_method} and {self._reference_forcefield} to assign nonbonded parameters')
self.cache_system(smiles, system)

# Convert to ffxml
Expand Down
Loading

0 comments on commit 6fe5f25

Please sign in to comment.