Skip to content

Commit

Permalink
Merge pull request #52 from martimunicoy/pdb_connectivity
Browse files Browse the repository at this point in the history
Pdb connectivity
  • Loading branch information
martimunicoy committed Sep 8, 2020
2 parents f36c892 + d636c5f commit 1ac9a54
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 11 deletions.
3 changes: 2 additions & 1 deletion docs/releasehistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ This is a micro release that includes general bug fixes and stability improvemen
Bugfixes
""""""""
- `PR #48 <https://github.com/martimunicoy/offpele/pull/48>`_: Fixes CLI's default output paths.
- `PR #52 <https://github.com/martimunicoy/offpele/pull/52>`_: Molecule connectivity can be assigned from an RDKit molecular template when loading it from a PDB file without connectivity.

Tests added
"""""""""""
- `PR #48 <https://github.com/martimunicoy/offpele/pull/48>`_: Adds tests to validate the assignment of the default output paths.

- `PR #52 <https://github.com/martimunicoy/offpele/pull/52>`_: Adds tests to validate the initialization using a connectivity template.

0.3.0 - Rotamers, OPLS2005, SMILES and stability improvements
-------------------------------------------------------------
Expand Down
69 changes: 69 additions & 0 deletions offpele/tests/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,72 @@ def test_molecule_tag_assignment(self):
# a custom tag
molecule = Molecule(smiles='c1ccccc1', tag='BNZ')
assert molecule.tag == 'BNZ', 'Unexpected atom tag'

def test_PDB_connectivity_template(self):
"""
It tests the initialization of an offpele's Molecule representation
from a PDB file without connectivity and a connectivity template.
"""
# Initialize an empty Molecule object
molecule = Molecule()
assert molecule.connectivity_template is None, \
'Unexpected connectivity template'

# Initialize a Molecule from a PDB without connectivity and
# without a connectivity template
ligand_path = get_data_file_path(
'ligands/BNZ_without_connectivity.pdb')
molecule = Molecule(ligand_path)

expected_bond_ids = [(1, 0, False), (2, 1, False), (3, 2, False),
(4, 3, False), (5, 4, False), (5, 0, False),
(6, 0, False), (7, 1, False), (8, 2, False),
(9, 3, False), (10, 4, False), (11, 5, False)]

for bond in molecule.rdkit_molecule.GetBonds():
bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
bond.GetIsAromatic())
assert bond_id in expected_bond_ids, 'Unexpected bond id ' \
+ '{}'.format(bond_id)

# Initialize a Molecule from a PDB without connectivity but with
# a connectivity template
template_path = get_data_file_path(
'ligands/BNZ.pdb')
template = Molecule(template_path)
ligand_path = get_data_file_path(
'ligands/BNZ_without_connectivity.pdb')
molecule = Molecule(ligand_path,
connectivity_template=template.rdkit_molecule)

expected_bond_ids = [(1, 0, True), (2, 1, True), (3, 2, True),
(4, 3, True), (5, 4, True), (5, 0, True),
(6, 0, False), (7, 1, False), (8, 2, False),
(9, 3, False), (10, 4, False), (11, 5, False)]

for bond in molecule.rdkit_molecule.GetBonds():
bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
bond.GetIsAromatic())
assert bond_id in expected_bond_ids, 'Unexpected bond id ' \
+ '{}'.format(bond_id)

# Initialize a Molecule from a PDB with connectivity and with
# a connectivity template
template_path = get_data_file_path(
'ligands/BNZ.pdb')
template = Molecule(template_path)
ligand_path = get_data_file_path(
'ligands/BNZ.pdb')
molecule = Molecule(ligand_path,
connectivity_template=template.rdkit_molecule)

expected_bond_ids = [(0, 1, True), (1, 2, True), (2, 3, True),
(3, 4, True), (4, 5, True), (0, 5, True),
(0, 6, False), (1, 7, False), (2, 8, False),
(3, 9, False), (4, 10, False), (5, 11, False)]

for bond in molecule.rdkit_molecule.GetBonds():
bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
bond.GetIsAromatic())
assert bond_id in expected_bond_ids, 'Unexpected bond id ' \
+ '{}'.format(bond_id)
66 changes: 59 additions & 7 deletions offpele/topology/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,8 @@ class Molecule(object):
"""

def __init__(self, path=None, smiles=None, rotamer_resolution=30,
exclude_terminal_rotamers=True, name='', tag='UNK'):
exclude_terminal_rotamers=True, name='', tag='UNK',
connectivity_template=None):
"""
It initializes a Molecule object through a PDB file or a SMILES
tag.
Expand All @@ -468,6 +469,9 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30,
The molecule name
tag : str
The molecule tag. It must be a 3-character string
connectivity_template : an rdkit.Chem.rdchem.Mol object
A molecule represented with RDKit to use when assigning the
connectivity of this Molecule object
Examples
--------
Expand All @@ -478,7 +482,7 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30,
>>> from offpele.topology import Molecule
>>> molecule = Molecule('molecule.pdb')
>>> molecule.parameterize('openff_unconstrained-1.1.1.offxml')
>>> molecule.parameterize('openff_unconstrained-1.2.0.offxml')
Load a molecule using a SMILES tag and parameterize it with Open
Force Field
Expand All @@ -488,11 +492,30 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30,
>>> molecule = Molecule(smiles='Cc1ccccc1')
>>> molecule.parameterize('openff_unconstrained-1.2.0.offxml')
Load a molecule usign a PDB file (without connectivity) and assign
the missing connectivity from an RDKit template (e.g. obtained
from qcportal and the Open Force Field Toolkit)
>>> import qcportal as ptl
>>> from openforcefield.topology import Molecule as OFFMolecule
>>> ds = client.get_collection('OptimizationDataset',
'Kinase Inhibitors: WBO Distributions')
>>> entry = ds.get_entry(ds.df.index[0])
>>> mol_record = OFFMolecule.from_qcschema(entry)
>>> template = mol_record.to_rdkit()
>>> from offpele.topology import Molecule
>>> molecule = Molecule('PDB_without_connectivity.pdb',
template=template)
"""
self._name = name
self._tag = tag
self._rotamer_resolution = rotamer_resolution
self._exclude_terminal_rotamers = exclude_terminal_rotamers
self._connectivity_template = connectivity_template

if isinstance(path, str):
from pathlib import Path
Expand Down Expand Up @@ -539,28 +562,38 @@ def _initialize_from_pdb(self, path):
path : str
The path to a PDB with the molecule structure
"""
print(' - Initializing molecule from PDB')
self._initialize()
print(' - Loading molecule from RDKit')

print(' - Loading molecule from RDKit')
rdkit_toolkit = RDKitToolkitWrapper()
self._rdkit_molecule = rdkit_toolkit.from_pdb(path)

# Use RDKit template, if any, to assign the connectivity to
# the current Molecule object
if self.connectivity_template is not None:
print(' - Assigning connectivity from template')
rdkit_toolkit.assign_connectivity_from_template(self)

# RDKit must generate stereochemistry specifically from 3D coords
print(' - Assigning stereochemistry from 3D coordinates')
rdkit_toolkit.assign_stereochemistry_from_3D(self)

# Set molecule name according to PDB name
if self.name == '':
from pathlib import Path
name = Path(path).stem
print(' - Setting molecule name to \'{}\''.format(name))
self.set_name(name)

# Set molecule tag according to PDB's residue name
if self.tag == 'UNK':
tag = rdkit_toolkit.get_residue_name(self)
print(' - Setting molecule tag to \'{}\''.format(tag))
self.set_tag(tag)

print(' - Representing molecule with the Open Force Field Toolkit')
openforcefield_toolkit = OpenForceFieldToolkitWrapper()

self._off_molecule = openforcefield_toolkit.from_rdkit(self)

def _initialize_from_smiles(self, smiles):
Expand All @@ -572,9 +605,10 @@ def _initialize_from_smiles(self, smiles):
smiles : str
The SMILES tag to construct the molecule structure with
"""
print(' - Initializing molecule from a SMILES tag')
self._initialize()
print(' - Constructing molecule from a SMILES tag with RDKit')

print(' - Loading molecule from RDKit')
rdkit_toolkit = RDKitToolkitWrapper()
self._rdkit_molecule = rdkit_toolkit.from_smiles(smiles)

Expand All @@ -584,10 +618,11 @@ def _initialize_from_smiles(self, smiles):

# Set molecule name according to the SMILES tag
if self.name == '':
print(' - Setting molecule name to \'{}\''.format(smiles))
self.set_name(smiles)

print(' - Representing molecule with the Open Force Field Toolkit')
openforcefield_toolkit = OpenForceFieldToolkitWrapper()

self._off_molecule = openforcefield_toolkit.from_rdkit(self)

def _build_rotamers(self):
Expand Down Expand Up @@ -1254,6 +1289,20 @@ def exclude_terminal_rotamers(self):
"""
return self._exclude_terminal_rotamers

@property
def connectivity_template(self):
"""
The template containing the correct connectivity for this Molecule
object.
Returns
-------
connectivity_template : an rdkit.Chem.rdchem.Mol object
A molecule represented with RDKit to use when assigning the
connectivity of this Molecule object
"""
return self._connectivity_template

@property
def off_molecule(self):
"""
Expand Down Expand Up @@ -1452,5 +1501,8 @@ def _ipython_display_(self):
"""
from IPython.display import display

# Get 2D molecular representation
rdkit_toolkit = RDKitToolkitWrapper()
return display(rdkit_toolkit.get_2D_representation(self))
representation = rdkit_toolkit.get_2D_representation(self)

return display(representation)
30 changes: 27 additions & 3 deletions offpele/utils/toolkits.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import subprocess
from collections import defaultdict
from pathlib import Path
from copy import copy
from copy import deepcopy

import numpy as np
from simtk import unit
Expand Down Expand Up @@ -143,6 +143,29 @@ def from_smiles(self, smiles):

return molecule

def assign_connectivity_from_template(self, molecule):
"""
It assigns the connectivity to an RDKit molecule according to the
connectivity from an RDKit connectivity template.
Parameters
----------
molecule : an offpele.topology.Molecule
The offpele's Molecule object
"""
from rdkit.Chem import AllChem

if molecule.connectivity_template is None:
raise ValueError('A connectivity template must be previously '
+ 'assigned to the molecule')

rdkit_molecule = molecule.rdkit_molecule

rdkit_molecule = AllChem.AssignBondOrdersFromTemplate(
molecule.connectivity_template, rdkit_molecule)

molecule._rdkit_molecule = rdkit_molecule

def assign_stereochemistry_from_3D(self, molecule):
"""
It assigns the stereochemistry to an RDKit molecule according to the
Expand Down Expand Up @@ -308,7 +331,7 @@ def get_atom_ids_with_rotatable_bonds(self, molecule):
"""
from rdkit import Chem

rdkit_molecule = molecule.rdkit_molecule
rdkit_molecule = deepcopy(molecule.rdkit_molecule)

rot_bonds_atom_ids = set([
frozenset(atom_pair) for atom_pair in
Expand Down Expand Up @@ -369,7 +392,8 @@ def get_2D_representation(self, molecule):
from rdkit.Chem import AllChem

rdkit_molecule = molecule.rdkit_molecule
representation_2D = copy(rdkit_molecule)
representation_2D = deepcopy(rdkit_molecule)

AllChem.Compute2DCoords(representation_2D)
return representation_2D

Expand Down

0 comments on commit 1ac9a54

Please sign in to comment.