diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst index 1bb330c1..04bdb62a 100644 --- a/docs/releasehistory.rst +++ b/docs/releasehistory.rst @@ -16,11 +16,12 @@ This is a micro release that includes general bug fixes and stability improvemen Bugfixes """""""" - `PR #48 `_: Fixes CLI's default output paths. +- `PR #52 `_: Molecule connectivity can be assigned from an RDKit molecular template when loading it from a PDB file without connectivity. Tests added """"""""""" - `PR #48 `_: Adds tests to validate the assignment of the default output paths. - +- `PR #52 `_: Adds tests to validate the initialization using a connectivity template. 0.3.0 - Rotamers, OPLS2005, SMILES and stability improvements ------------------------------------------------------------- diff --git a/offpele/tests/test_molecule.py b/offpele/tests/test_molecule.py index c56a57b2..47a9ec3d 100644 --- a/offpele/tests/test_molecule.py +++ b/offpele/tests/test_molecule.py @@ -111,3 +111,72 @@ def test_molecule_tag_assignment(self): # a custom tag molecule = Molecule(smiles='c1ccccc1', tag='BNZ') assert molecule.tag == 'BNZ', 'Unexpected atom tag' + + def test_PDB_connectivity_template(self): + """ + It tests the initialization of an offpele's Molecule representation + from a PDB file without connectivity and a connectivity template. + """ + # Initialize an empty Molecule object + molecule = Molecule() + assert molecule.connectivity_template is None, \ + 'Unexpected connectivity template' + + # Initialize a Molecule from a PDB without connectivity and + # without a connectivity template + ligand_path = get_data_file_path( + 'ligands/BNZ_without_connectivity.pdb') + molecule = Molecule(ligand_path) + + expected_bond_ids = [(1, 0, False), (2, 1, False), (3, 2, False), + (4, 3, False), (5, 4, False), (5, 0, False), + (6, 0, False), (7, 1, False), (8, 2, False), + (9, 3, False), (10, 4, False), (11, 5, False)] + + for bond in molecule.rdkit_molecule.GetBonds(): + bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), + bond.GetIsAromatic()) + assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + + '{}'.format(bond_id) + + # Initialize a Molecule from a PDB without connectivity but with + # a connectivity template + template_path = get_data_file_path( + 'ligands/BNZ.pdb') + template = Molecule(template_path) + ligand_path = get_data_file_path( + 'ligands/BNZ_without_connectivity.pdb') + molecule = Molecule(ligand_path, + connectivity_template=template.rdkit_molecule) + + expected_bond_ids = [(1, 0, True), (2, 1, True), (3, 2, True), + (4, 3, True), (5, 4, True), (5, 0, True), + (6, 0, False), (7, 1, False), (8, 2, False), + (9, 3, False), (10, 4, False), (11, 5, False)] + + for bond in molecule.rdkit_molecule.GetBonds(): + bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), + bond.GetIsAromatic()) + assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + + '{}'.format(bond_id) + + # Initialize a Molecule from a PDB with connectivity and with + # a connectivity template + template_path = get_data_file_path( + 'ligands/BNZ.pdb') + template = Molecule(template_path) + ligand_path = get_data_file_path( + 'ligands/BNZ.pdb') + molecule = Molecule(ligand_path, + connectivity_template=template.rdkit_molecule) + + expected_bond_ids = [(0, 1, True), (1, 2, True), (2, 3, True), + (3, 4, True), (4, 5, True), (0, 5, True), + (0, 6, False), (1, 7, False), (2, 8, False), + (3, 9, False), (4, 10, False), (5, 11, False)] + + for bond in molecule.rdkit_molecule.GetBonds(): + bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), + bond.GetIsAromatic()) + assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + + '{}'.format(bond_id) diff --git a/offpele/topology/molecule.py b/offpele/topology/molecule.py index 1af1d697..7b30ac02 100644 --- a/offpele/topology/molecule.py +++ b/offpele/topology/molecule.py @@ -447,7 +447,8 @@ class Molecule(object): """ def __init__(self, path=None, smiles=None, rotamer_resolution=30, - exclude_terminal_rotamers=True, name='', tag='UNK'): + exclude_terminal_rotamers=True, name='', tag='UNK', + connectivity_template=None): """ It initializes a Molecule object through a PDB file or a SMILES tag. @@ -468,6 +469,9 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30, The molecule name tag : str The molecule tag. It must be a 3-character string + connectivity_template : an rdkit.Chem.rdchem.Mol object + A molecule represented with RDKit to use when assigning the + connectivity of this Molecule object Examples -------- @@ -478,7 +482,7 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30, >>> from offpele.topology import Molecule >>> molecule = Molecule('molecule.pdb') - >>> molecule.parameterize('openff_unconstrained-1.1.1.offxml') + >>> molecule.parameterize('openff_unconstrained-1.2.0.offxml') Load a molecule using a SMILES tag and parameterize it with Open Force Field @@ -488,11 +492,30 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30, >>> molecule = Molecule(smiles='Cc1ccccc1') >>> molecule.parameterize('openff_unconstrained-1.2.0.offxml') + Load a molecule usign a PDB file (without connectivity) and assign + the missing connectivity from an RDKit template (e.g. obtained + from qcportal and the Open Force Field Toolkit) + + >>> import qcportal as ptl + >>> from openforcefield.topology import Molecule as OFFMolecule + + >>> ds = client.get_collection('OptimizationDataset', + 'Kinase Inhibitors: WBO Distributions') + >>> entry = ds.get_entry(ds.df.index[0]) + >>> mol_record = OFFMolecule.from_qcschema(entry) + >>> template = mol_record.to_rdkit() + + >>> from offpele.topology import Molecule + + >>> molecule = Molecule('PDB_without_connectivity.pdb', + template=template) + """ self._name = name self._tag = tag self._rotamer_resolution = rotamer_resolution self._exclude_terminal_rotamers = exclude_terminal_rotamers + self._connectivity_template = connectivity_template if isinstance(path, str): from pathlib import Path @@ -539,28 +562,38 @@ def _initialize_from_pdb(self, path): path : str The path to a PDB with the molecule structure """ + print(' - Initializing molecule from PDB') self._initialize() - print(' - Loading molecule from RDKit') + print(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = rdkit_toolkit.from_pdb(path) + # Use RDKit template, if any, to assign the connectivity to + # the current Molecule object + if self.connectivity_template is not None: + print(' - Assigning connectivity from template') + rdkit_toolkit.assign_connectivity_from_template(self) + # RDKit must generate stereochemistry specifically from 3D coords + print(' - Assigning stereochemistry from 3D coordinates') rdkit_toolkit.assign_stereochemistry_from_3D(self) # Set molecule name according to PDB name if self.name == '': from pathlib import Path name = Path(path).stem + print(' - Setting molecule name to \'{}\''.format(name)) self.set_name(name) # Set molecule tag according to PDB's residue name if self.tag == 'UNK': tag = rdkit_toolkit.get_residue_name(self) + print(' - Setting molecule tag to \'{}\''.format(tag)) self.set_tag(tag) + print(' - Representing molecule with the Open Force Field Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() - self._off_molecule = openforcefield_toolkit.from_rdkit(self) def _initialize_from_smiles(self, smiles): @@ -572,9 +605,10 @@ def _initialize_from_smiles(self, smiles): smiles : str The SMILES tag to construct the molecule structure with """ + print(' - Initializing molecule from a SMILES tag') self._initialize() - print(' - Constructing molecule from a SMILES tag with RDKit') + print(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = rdkit_toolkit.from_smiles(smiles) @@ -584,10 +618,11 @@ def _initialize_from_smiles(self, smiles): # Set molecule name according to the SMILES tag if self.name == '': + print(' - Setting molecule name to \'{}\''.format(smiles)) self.set_name(smiles) + print(' - Representing molecule with the Open Force Field Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() - self._off_molecule = openforcefield_toolkit.from_rdkit(self) def _build_rotamers(self): @@ -1254,6 +1289,20 @@ def exclude_terminal_rotamers(self): """ return self._exclude_terminal_rotamers + @property + def connectivity_template(self): + """ + The template containing the correct connectivity for this Molecule + object. + + Returns + ------- + connectivity_template : an rdkit.Chem.rdchem.Mol object + A molecule represented with RDKit to use when assigning the + connectivity of this Molecule object + """ + return self._connectivity_template + @property def off_molecule(self): """ @@ -1452,5 +1501,8 @@ def _ipython_display_(self): """ from IPython.display import display + # Get 2D molecular representation rdkit_toolkit = RDKitToolkitWrapper() - return display(rdkit_toolkit.get_2D_representation(self)) + representation = rdkit_toolkit.get_2D_representation(self) + + return display(representation) diff --git a/offpele/utils/toolkits.py b/offpele/utils/toolkits.py index 3d81faea..ea835009 100644 --- a/offpele/utils/toolkits.py +++ b/offpele/utils/toolkits.py @@ -10,7 +10,7 @@ import subprocess from collections import defaultdict from pathlib import Path -from copy import copy +from copy import deepcopy import numpy as np from simtk import unit @@ -143,6 +143,29 @@ def from_smiles(self, smiles): return molecule + def assign_connectivity_from_template(self, molecule): + """ + It assigns the connectivity to an RDKit molecule according to the + connectivity from an RDKit connectivity template. + + Parameters + ---------- + molecule : an offpele.topology.Molecule + The offpele's Molecule object + """ + from rdkit.Chem import AllChem + + if molecule.connectivity_template is None: + raise ValueError('A connectivity template must be previously ' + + 'assigned to the molecule') + + rdkit_molecule = molecule.rdkit_molecule + + rdkit_molecule = AllChem.AssignBondOrdersFromTemplate( + molecule.connectivity_template, rdkit_molecule) + + molecule._rdkit_molecule = rdkit_molecule + def assign_stereochemistry_from_3D(self, molecule): """ It assigns the stereochemistry to an RDKit molecule according to the @@ -308,7 +331,7 @@ def get_atom_ids_with_rotatable_bonds(self, molecule): """ from rdkit import Chem - rdkit_molecule = molecule.rdkit_molecule + rdkit_molecule = deepcopy(molecule.rdkit_molecule) rot_bonds_atom_ids = set([ frozenset(atom_pair) for atom_pair in @@ -369,7 +392,8 @@ def get_2D_representation(self, molecule): from rdkit.Chem import AllChem rdkit_molecule = molecule.rdkit_molecule - representation_2D = copy(rdkit_molecule) + representation_2D = deepcopy(rdkit_molecule) + AllChem.Compute2DCoords(representation_2D) return representation_2D