Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add explicit hydrogens if needed in from_openeye #364

Merged
merged 7 commits into from Jun 29, 2019
59 changes: 59 additions & 0 deletions openforcefield/tests/test_toolkits.py
Expand Up @@ -236,6 +236,35 @@ def test_from_openeye_implicit_hydrogen(self):
molecule_from_expl = Molecule.from_openeye(oemol_expl)
assert molecule_from_expl.to_smiles() == molecule_from_impl.to_smiles()

@pytest.mark.skipif(not OpenEyeToolkitWrapper.is_available(), reason='OpenEye Toolkit not available')
def test_openeye_from_smiles_hydrogens_are_explicit(self):
"""
Test to ensure that OpenEyeToolkitWrapper.from_smiles has the proper behavior with
respect to its hydrogens_are_explicit kwarg
"""
toolkit_wrapper = OpenEyeToolkitWrapper()
smiles_impl = "C#C"
with pytest.raises(ValueError,
match="but OpenEye Toolkit interpreted SMILES 'C#C' as having implicit hydrogen") as excinfo:
offmol = Molecule.from_smiles(smiles_impl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=True)
offmol = Molecule.from_smiles(smiles_impl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=False)
andrrizzi marked this conversation as resolved.
Show resolved Hide resolved

smiles_expl = "HC#CH"
offmol = Molecule.from_smiles(smiles_expl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=True)
# It's debatable whether this next function should pass. Strictly speaking, the hydrogens in this SMILES
# _are_ explicit, so allowing "hydrogens_are_explicit=False" through here is allowing a contradiction.
# We might rethink the name of this kwarg.

offmol = Molecule.from_smiles(smiles_expl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=False)



@pytest.mark.skipif(not OpenEyeToolkitWrapper.is_available(), reason='OpenEye Toolkit not available')
Expand Down Expand Up @@ -498,6 +527,36 @@ def test_smiles_add_H(self):
smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
assert smiles2 == expected_output_smiles

@pytest.mark.skipif(not OpenEyeToolkitWrapper.is_available(), reason='OpenEye Toolkit not available')
def test_rdkit_from_smiles_hydrogens_are_explicit(self):
andrrizzi marked this conversation as resolved.
Show resolved Hide resolved
"""
Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with
respect to its hydrogens_are_explicit kwarg
"""
toolkit_wrapper = RDKitToolkitWrapper()
smiles_impl = "C#C"
with pytest.raises(ValueError,
match="but RDKit toolkit interpreted SMILES 'C#C' as having implicit hydrogen") as excinfo:
offmol = Molecule.from_smiles(smiles_impl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=True)
offmol = Molecule.from_smiles(smiles_impl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=False)

smiles_expl = "[H][C]#[C][H]"
offmol = Molecule.from_smiles(smiles_expl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=True)

# It's debatable whether this next function should pass. Strictly speaking, the hydrogens in this SMILES
# _are_ explicit, so allowing "hydrogens_are_explicit=False" through here is allowing a contradiction.
# We might rethink the name of this kwarg.

offmol = Molecule.from_smiles(smiles_expl,
toolkit_registry=toolkit_wrapper,
hydrogens_are_explicit=False)


@pytest.mark.skipif(not RDKitToolkitWrapper.is_available(), reason='RDKit Toolkit not available')
def test_smiles_charged(self):
Expand Down
21 changes: 19 additions & 2 deletions openforcefield/utils/toolkits.py
Expand Up @@ -1065,9 +1065,16 @@ def from_smiles(self, smiles, hydrogens_are_explicit=False, allow_undefined_ster
if not (hydrogens_are_explicit):
result = oechem.OEAddExplicitHydrogens(oemol)
if result == False:
raise Exception(
raise ValueError(
andrrizzi marked this conversation as resolved.
Show resolved Hide resolved
"Addition of explicit hydrogens failed in from_openeye")
# TODO: Add allow_undefined_stereo to this function, and pass to from_openeye?
elif hydrogens_are_explicit:
if oechem.OEHasImplicitHydrogens(oemol):
andrrizzi marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"'hydrogens_are_explicit' was specified as True, but OpenEye Toolkit interpreted "
f"SMILES '{smiles}' as having implicit hydrogen. If this SMILES is intended to "
f"express all explicit hydrogens in the molecule, then you should construct the "
f"desired molecule as an OEMol (where oechem.OEHasImplicitHydrogens(oemol) returns "
f"False), and then use Molecule.from_openeye() to create the desired OFFMol.")
molecule = self.from_openeye(oemol,
allow_undefined_stereo=allow_undefined_stereo)
return molecule
Expand Down Expand Up @@ -1744,6 +1751,16 @@ def from_smiles(self, smiles, hydrogens_are_explicit=False, allow_undefined_ster
# Add explicit hydrogens if they aren't there already
if not hydrogens_are_explicit:
rdmol = Chem.AddHs(rdmol)
elif hydrogens_are_explicit:
for atom_idx in range(rdmol.GetNumAtoms()):
atom = rdmol.GetAtomWithIdx(atom_idx)
if atom.GetNumImplicitHs() != 0:
raise ValueError(
f"'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted "
f"SMILES '{smiles}' as having implicit hydrogen. If this SMILES is intended to "
f"express all explicit hydrogens in the molecule, then you should construct the "
f"desired molecule as an RDMol with no implicit hydrogens, and then use "
f"Molecule.from_rdkit() to create the desired OFFMol.")

molecule = Molecule.from_rdkit(rdmol,
allow_undefined_stereo=allow_undefined_stereo)
Expand Down