From 97c49f8e8edbf019d0d8dad29711bbeb1906b430 Mon Sep 17 00:00:00 2001 From: Marti Municoy Date: Fri, 4 Sep 2020 13:17:36 +0200 Subject: [PATCH 1/7] Remove highlighting in molecule representation --- offpele/topology/molecule.py | 5 ++++- offpele/utils/toolkits.py | 7 ++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/offpele/topology/molecule.py b/offpele/topology/molecule.py index 1af1d697..90403e96 100644 --- a/offpele/topology/molecule.py +++ b/offpele/topology/molecule.py @@ -1452,5 +1452,8 @@ def _ipython_display_(self): """ from IPython.display import display + # Get 2D molecular representation rdkit_toolkit = RDKitToolkitWrapper() - return display(rdkit_toolkit.get_2D_representation(self)) + representation = rdkit_toolkit.get_2D_representation(self) + + return display(representation) diff --git a/offpele/utils/toolkits.py b/offpele/utils/toolkits.py index 3d81faea..20b8bf84 100644 --- a/offpele/utils/toolkits.py +++ b/offpele/utils/toolkits.py @@ -10,7 +10,7 @@ import subprocess from collections import defaultdict from pathlib import Path -from copy import copy +from copy import deepcopy import numpy as np from simtk import unit @@ -308,7 +308,7 @@ def get_atom_ids_with_rotatable_bonds(self, molecule): """ from rdkit import Chem - rdkit_molecule = molecule.rdkit_molecule + rdkit_molecule = deepcopy(molecule.rdkit_molecule) rot_bonds_atom_ids = set([ frozenset(atom_pair) for atom_pair in @@ -369,7 +369,8 @@ def get_2D_representation(self, molecule): from rdkit.Chem import AllChem rdkit_molecule = molecule.rdkit_molecule - representation_2D = copy(rdkit_molecule) + representation_2D = deepcopy(rdkit_molecule) + AllChem.Compute2DCoords(representation_2D) return representation_2D From c94be6fb5ce98f6798a4e5056f79dd9dce287b97 Mon Sep 17 00:00:00 2001 From: Marti Municoy Date: Tue, 8 Sep 2020 11:48:41 +0200 Subject: [PATCH 2/7] Add connectivity template to Molecule --- offpele/topology/molecule.py | 61 ++++++++++++++++++++++++++++++++---- offpele/utils/toolkits.py | 23 ++++++++++++++ 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/offpele/topology/molecule.py b/offpele/topology/molecule.py index 90403e96..801e93ba 100644 --- a/offpele/topology/molecule.py +++ b/offpele/topology/molecule.py @@ -447,7 +447,8 @@ class Molecule(object): """ def __init__(self, path=None, smiles=None, rotamer_resolution=30, - exclude_terminal_rotamers=True, name='', tag='UNK'): + exclude_terminal_rotamers=True, name='', tag='UNK', + connectivity_template=None): """ It initializes a Molecule object through a PDB file or a SMILES tag. @@ -468,6 +469,9 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30, The molecule name tag : str The molecule tag. It must be a 3-character string + connectivity_template : an rdkit.Chem.rdchem.Mol object + A molecule represented with RDKit to use when assigning the + connectivity of this Molecule object Examples -------- @@ -478,7 +482,7 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30, >>> from offpele.topology import Molecule >>> molecule = Molecule('molecule.pdb') - >>> molecule.parameterize('openff_unconstrained-1.1.1.offxml') + >>> molecule.parameterize('openff_unconstrained-1.2.0.offxml') Load a molecule using a SMILES tag and parameterize it with Open Force Field @@ -488,11 +492,30 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30, >>> molecule = Molecule(smiles='Cc1ccccc1') >>> molecule.parameterize('openff_unconstrained-1.2.0.offxml') + Load a molecule usign a PDB file (without connectivity) and assign + the missing connectivity from an RDKit template (e.g. obtained + from qcportal and the Open Force Field Toolkit) + + >>> import qcportal as ptl + >>> from openforcefield.topology import Molecule as OFFMolecule + + >>> ds = client.get_collection('OptimizationDataset', + 'Kinase Inhibitors: WBO Distributions') + >>> entry = ds.get_entry(ds.df.index[0]) + >>> mol_record = OFFMolecule.from_qcschema(entry) + >>> template = mol_record.to_rdkit() + + >>> from offpele.topology import Molecule + + >>> molecule = Molecule('PDB_without_connectivity.pdb', + template=template) + """ self._name = name self._tag = tag self._rotamer_resolution = rotamer_resolution self._exclude_terminal_rotamers = exclude_terminal_rotamers + self._connectivity_template = connectivity_template if isinstance(path, str): from pathlib import Path @@ -539,28 +562,38 @@ def _initialize_from_pdb(self, path): path : str The path to a PDB with the molecule structure """ + print(' - Initializing molecule from PDB') self._initialize() - print(' - Loading molecule from RDKit') + print(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = rdkit_toolkit.from_pdb(path) + # Use RDKit template, if any, to assign the connectivity to + # the current Molecule object + if self.connectivity_template is not None: + print(' - Assigning connectivity from template') + rdkit_toolkit.assign_connectivity_from_template(self) + # RDKit must generate stereochemistry specifically from 3D coords + print(' - Assigning stereochemistry from 3D coordinates') rdkit_toolkit.assign_stereochemistry_from_3D(self) # Set molecule name according to PDB name if self.name == '': from pathlib import Path name = Path(path).stem + print(' - Setting molecule name to \'{}\''.format(name)) self.set_name(name) # Set molecule tag according to PDB's residue name if self.tag == 'UNK': tag = rdkit_toolkit.get_residue_name(self) + print(' - Setting molecule tag to \'{}\''.format(tag)) self.set_tag(tag) + print(' - Representing molecule with the Open Force Field Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() - self._off_molecule = openforcefield_toolkit.from_rdkit(self) def _initialize_from_smiles(self, smiles): @@ -572,9 +605,10 @@ def _initialize_from_smiles(self, smiles): smiles : str The SMILES tag to construct the molecule structure with """ + print(' - Initializing molecule from a SMILES tag') self._initialize() - print(' - Constructing molecule from a SMILES tag with RDKit') + print(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = rdkit_toolkit.from_smiles(smiles) @@ -584,10 +618,11 @@ def _initialize_from_smiles(self, smiles): # Set molecule name according to the SMILES tag if self.name == '': + print(' - Setting molecule name to \'{}\''.format(name)) self.set_name(smiles) + print(' - Representing molecule with the Open Force Field Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() - self._off_molecule = openforcefield_toolkit.from_rdkit(self) def _build_rotamers(self): @@ -1254,6 +1289,20 @@ def exclude_terminal_rotamers(self): """ return self._exclude_terminal_rotamers + @property + def connectivity_template(self): + """ + The template containing the correct connectivity for this Molecule + object. + + Returns + ------- + connectivity_template : an rdkit.Chem.rdchem.Mol object + A molecule represented with RDKit to use when assigning the + connectivity of this Molecule object + """ + return self._connectivity_template + @property def off_molecule(self): """ diff --git a/offpele/utils/toolkits.py b/offpele/utils/toolkits.py index 20b8bf84..ea835009 100644 --- a/offpele/utils/toolkits.py +++ b/offpele/utils/toolkits.py @@ -143,6 +143,29 @@ def from_smiles(self, smiles): return molecule + def assign_connectivity_from_template(self, molecule): + """ + It assigns the connectivity to an RDKit molecule according to the + connectivity from an RDKit connectivity template. + + Parameters + ---------- + molecule : an offpele.topology.Molecule + The offpele's Molecule object + """ + from rdkit.Chem import AllChem + + if molecule.connectivity_template is None: + raise ValueError('A connectivity template must be previously ' + + 'assigned to the molecule') + + rdkit_molecule = molecule.rdkit_molecule + + rdkit_molecule = AllChem.AssignBondOrdersFromTemplate( + molecule.connectivity_template, rdkit_molecule) + + molecule._rdkit_molecule = rdkit_molecule + def assign_stereochemistry_from_3D(self, molecule): """ It assigns the stereochemistry to an RDKit molecule according to the From 63a538dad946f81b1dc5df8b7b48e41d55a01001 Mon Sep 17 00:00:00 2001 From: Marti Municoy Date: Tue, 8 Sep 2020 12:06:08 +0200 Subject: [PATCH 3/7] Add test for connectivity template initialization --- offpele/tests/test_molecule.py | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/offpele/tests/test_molecule.py b/offpele/tests/test_molecule.py index c56a57b2..b8a6d029 100644 --- a/offpele/tests/test_molecule.py +++ b/offpele/tests/test_molecule.py @@ -111,3 +111,65 @@ def test_molecule_tag_assignment(self): # a custom tag molecule = Molecule(smiles='c1ccccc1', tag='BNZ') assert molecule.tag == 'BNZ', 'Unexpected atom tag' + + def test_PDB_connectivity_template(self): + """ + It tests the initialization of an offpele's Molecule representation + from a PDB file without connectivity and a connectivity template. + """ + # Initialize an empty Molecule object + molecule = Molecule() + assert molecule.connectivity_template is None, \ + 'Unexpected connectivity template' + + # Initialize a Molecule from a PDB without connectivity + ligand_path = get_data_file_path( + 'ligands/BNZ_without_connectivity.pdb') + molecule = Molecule(ligand_path) + + expected_bond_ids = [(1, 0, False), + (2, 1, False), + (3, 2, False), + (4, 3, False), + (5, 4, False), + (5, 0, False), + (6, 0, False), + (7, 1, False), + (8, 2, False), + (9, 3, False), + (10, 4, False), + (11, 5, False)] + + for bond in molecule.rdkit_molecule.GetBonds(): + bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), + bond.GetIsAromatic()) + assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + + '{}'.format(bond_id) + + # Initialize a Molecule from a PDB without connectivity + template_path = get_data_file_path( + 'ligands/BNZ.pdb') + template = Molecule(template_path) + ligand_path = get_data_file_path( + 'ligands/BNZ_without_connectivity.pdb') + molecule = Molecule(ligand_path, + connectivity_template=template.rdkit_molecule) + + expected_bond_ids = [(1, 0, True), + (2, 1, True), + (3, 2, True), + (4, 3, True), + (5, 4, True), + (5, 0, True), + (6, 0, False), + (7, 1, False), + (8, 2, False), + (9, 3, False), + (10, 4, False), + (11, 5, False)] + + for bond in molecule.rdkit_molecule.GetBonds(): + bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), + bond.GetIsAromatic()) + assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + + '{}'.format(bond_id) From 4bc35baf369aa6f509820905c5a286684dcf8e87 Mon Sep 17 00:00:00 2001 From: Marti Municoy Date: Tue, 8 Sep 2020 12:07:25 +0200 Subject: [PATCH 4/7] Minor changes in connectivity template test --- offpele/tests/test_molecule.py | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/offpele/tests/test_molecule.py b/offpele/tests/test_molecule.py index b8a6d029..a7edb726 100644 --- a/offpele/tests/test_molecule.py +++ b/offpele/tests/test_molecule.py @@ -127,18 +127,10 @@ def test_PDB_connectivity_template(self): 'ligands/BNZ_without_connectivity.pdb') molecule = Molecule(ligand_path) - expected_bond_ids = [(1, 0, False), - (2, 1, False), - (3, 2, False), - (4, 3, False), - (5, 4, False), - (5, 0, False), - (6, 0, False), - (7, 1, False), - (8, 2, False), - (9, 3, False), - (10, 4, False), - (11, 5, False)] + expected_bond_ids = [(1, 0, False), (2, 1, False), (3, 2, False), + (4, 3, False), (5, 4, False), (5, 0, False), + (6, 0, False), (7, 1, False), (8, 2, False), + (9, 3, False), (10, 4, False), (11, 5, False)] for bond in molecule.rdkit_molecule.GetBonds(): bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), @@ -155,18 +147,10 @@ def test_PDB_connectivity_template(self): molecule = Molecule(ligand_path, connectivity_template=template.rdkit_molecule) - expected_bond_ids = [(1, 0, True), - (2, 1, True), - (3, 2, True), - (4, 3, True), - (5, 4, True), - (5, 0, True), - (6, 0, False), - (7, 1, False), - (8, 2, False), - (9, 3, False), - (10, 4, False), - (11, 5, False)] + expected_bond_ids = [(1, 0, True), (2, 1, True), (3, 2, True), + (4, 3, True), (5, 4, True), (5, 0, True), + (6, 0, False), (7, 1, False), (8, 2, False), + (9, 3, False), (10, 4, False), (11, 5, False)] for bond in molecule.rdkit_molecule.GetBonds(): bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), From e87127066cda3f134afb9853766997cc06616514 Mon Sep 17 00:00:00 2001 From: Marti Municoy Date: Tue, 8 Sep 2020 12:22:38 +0200 Subject: [PATCH 5/7] Fix minor bug in smiles initialization --- offpele/topology/molecule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offpele/topology/molecule.py b/offpele/topology/molecule.py index 801e93ba..7b30ac02 100644 --- a/offpele/topology/molecule.py +++ b/offpele/topology/molecule.py @@ -618,7 +618,7 @@ def _initialize_from_smiles(self, smiles): # Set molecule name according to the SMILES tag if self.name == '': - print(' - Setting molecule name to \'{}\''.format(name)) + print(' - Setting molecule name to \'{}\''.format(smiles)) self.set_name(smiles) print(' - Representing molecule with the Open Force Field Toolkit') From e4273ec8758d2bc1328b2b4c4415361caf6a9773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=AD=20Municoy?= Date: Tue, 8 Sep 2020 12:27:30 +0200 Subject: [PATCH 6/7] Update releasehistory.rst --- docs/releasehistory.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst index 1bb330c1..04bdb62a 100644 --- a/docs/releasehistory.rst +++ b/docs/releasehistory.rst @@ -16,11 +16,12 @@ This is a micro release that includes general bug fixes and stability improvemen Bugfixes """""""" - `PR #48 `_: Fixes CLI's default output paths. +- `PR #52 `_: Molecule connectivity can be assigned from an RDKit molecular template when loading it from a PDB file without connectivity. Tests added """"""""""" - `PR #48 `_: Adds tests to validate the assignment of the default output paths. - +- `PR #52 `_: Adds tests to validate the initialization using a connectivity template. 0.3.0 - Rotamers, OPLS2005, SMILES and stability improvements ------------------------------------------------------------- From 151c5a39cedac4172b0957946bab81264621e383 Mon Sep 17 00:00:00 2001 From: Marti Municoy Date: Tue, 8 Sep 2020 12:33:12 +0200 Subject: [PATCH 7/7] Update connectivity template test --- offpele/tests/test_molecule.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/offpele/tests/test_molecule.py b/offpele/tests/test_molecule.py index a7edb726..47a9ec3d 100644 --- a/offpele/tests/test_molecule.py +++ b/offpele/tests/test_molecule.py @@ -122,7 +122,8 @@ def test_PDB_connectivity_template(self): assert molecule.connectivity_template is None, \ 'Unexpected connectivity template' - # Initialize a Molecule from a PDB without connectivity + # Initialize a Molecule from a PDB without connectivity and + # without a connectivity template ligand_path = get_data_file_path( 'ligands/BNZ_without_connectivity.pdb') molecule = Molecule(ligand_path) @@ -138,7 +139,8 @@ def test_PDB_connectivity_template(self): assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + '{}'.format(bond_id) - # Initialize a Molecule from a PDB without connectivity + # Initialize a Molecule from a PDB without connectivity but with + # a connectivity template template_path = get_data_file_path( 'ligands/BNZ.pdb') template = Molecule(template_path) @@ -157,3 +159,24 @@ def test_PDB_connectivity_template(self): bond.GetIsAromatic()) assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + '{}'.format(bond_id) + + # Initialize a Molecule from a PDB with connectivity and with + # a connectivity template + template_path = get_data_file_path( + 'ligands/BNZ.pdb') + template = Molecule(template_path) + ligand_path = get_data_file_path( + 'ligands/BNZ.pdb') + molecule = Molecule(ligand_path, + connectivity_template=template.rdkit_molecule) + + expected_bond_ids = [(0, 1, True), (1, 2, True), (2, 3, True), + (3, 4, True), (4, 5, True), (0, 5, True), + (0, 6, False), (1, 7, False), (2, 8, False), + (3, 9, False), (4, 10, False), (5, 11, False)] + + for bond in molecule.rdkit_molecule.GetBonds(): + bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), + bond.GetIsAromatic()) + assert bond_id in expected_bond_ids, 'Unexpected bond id ' \ + + '{}'.format(bond_id)