Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pdb connectivity #52

Merged
merged 8 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/releasehistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ This is a micro release that includes general bug fixes and stability improvemen
Bugfixes
""""""""
- `PR #48 <https://github.com/martimunicoy/offpele/pull/48>`_: Fixes CLI's default output paths.
- `PR #52 <https://github.com/martimunicoy/offpele/pull/52>`_: Molecule connectivity can be assigned from an RDKit molecular template when loading it from a PDB file without connectivity.

Tests added
"""""""""""
- `PR #48 <https://github.com/martimunicoy/offpele/pull/48>`_: Adds tests to validate the assignment of the default output paths.

- `PR #52 <https://github.com/martimunicoy/offpele/pull/52>`_: Adds tests to validate the initialization using a connectivity template.

0.3.0 - Rotamers, OPLS2005, SMILES and stability improvements
-------------------------------------------------------------
Expand Down
69 changes: 69 additions & 0 deletions offpele/tests/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,72 @@ def test_molecule_tag_assignment(self):
# a custom tag
molecule = Molecule(smiles='c1ccccc1', tag='BNZ')
assert molecule.tag == 'BNZ', 'Unexpected atom tag'

def test_PDB_connectivity_template(self):
"""
It tests the initialization of an offpele's Molecule representation
from a PDB file without connectivity and a connectivity template.
"""
# Initialize an empty Molecule object
molecule = Molecule()
assert molecule.connectivity_template is None, \
'Unexpected connectivity template'

# Initialize a Molecule from a PDB without connectivity and
# without a connectivity template
ligand_path = get_data_file_path(
'ligands/BNZ_without_connectivity.pdb')
molecule = Molecule(ligand_path)

expected_bond_ids = [(1, 0, False), (2, 1, False), (3, 2, False),
(4, 3, False), (5, 4, False), (5, 0, False),
(6, 0, False), (7, 1, False), (8, 2, False),
(9, 3, False), (10, 4, False), (11, 5, False)]

for bond in molecule.rdkit_molecule.GetBonds():
bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
bond.GetIsAromatic())
assert bond_id in expected_bond_ids, 'Unexpected bond id ' \
+ '{}'.format(bond_id)

# Initialize a Molecule from a PDB without connectivity but with
# a connectivity template
template_path = get_data_file_path(
'ligands/BNZ.pdb')
template = Molecule(template_path)
ligand_path = get_data_file_path(
'ligands/BNZ_without_connectivity.pdb')
molecule = Molecule(ligand_path,
connectivity_template=template.rdkit_molecule)

expected_bond_ids = [(1, 0, True), (2, 1, True), (3, 2, True),
(4, 3, True), (5, 4, True), (5, 0, True),
(6, 0, False), (7, 1, False), (8, 2, False),
(9, 3, False), (10, 4, False), (11, 5, False)]

for bond in molecule.rdkit_molecule.GetBonds():
bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
bond.GetIsAromatic())
assert bond_id in expected_bond_ids, 'Unexpected bond id ' \
+ '{}'.format(bond_id)

# Initialize a Molecule from a PDB with connectivity and with
# a connectivity template
template_path = get_data_file_path(
'ligands/BNZ.pdb')
template = Molecule(template_path)
ligand_path = get_data_file_path(
'ligands/BNZ.pdb')
molecule = Molecule(ligand_path,
connectivity_template=template.rdkit_molecule)

expected_bond_ids = [(0, 1, True), (1, 2, True), (2, 3, True),
(3, 4, True), (4, 5, True), (0, 5, True),
(0, 6, False), (1, 7, False), (2, 8, False),
(3, 9, False), (4, 10, False), (5, 11, False)]

for bond in molecule.rdkit_molecule.GetBonds():
bond_id = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
bond.GetIsAromatic())
assert bond_id in expected_bond_ids, 'Unexpected bond id ' \
+ '{}'.format(bond_id)
66 changes: 59 additions & 7 deletions offpele/topology/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,8 @@ class Molecule(object):
"""

def __init__(self, path=None, smiles=None, rotamer_resolution=30,
exclude_terminal_rotamers=True, name='', tag='UNK'):
exclude_terminal_rotamers=True, name='', tag='UNK',
connectivity_template=None):
"""
It initializes a Molecule object through a PDB file or a SMILES
tag.
Expand All @@ -468,6 +469,9 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30,
The molecule name
tag : str
The molecule tag. It must be a 3-character string
connectivity_template : an rdkit.Chem.rdchem.Mol object
A molecule represented with RDKit to use when assigning the
connectivity of this Molecule object
Examples
--------
Expand All @@ -478,7 +482,7 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30,
>>> from offpele.topology import Molecule
>>> molecule = Molecule('molecule.pdb')
>>> molecule.parameterize('openff_unconstrained-1.1.1.offxml')
>>> molecule.parameterize('openff_unconstrained-1.2.0.offxml')
Load a molecule using a SMILES tag and parameterize it with Open
Force Field
Expand All @@ -488,11 +492,30 @@ def __init__(self, path=None, smiles=None, rotamer_resolution=30,
>>> molecule = Molecule(smiles='Cc1ccccc1')
>>> molecule.parameterize('openff_unconstrained-1.2.0.offxml')
Load a molecule usign a PDB file (without connectivity) and assign
the missing connectivity from an RDKit template (e.g. obtained
from qcportal and the Open Force Field Toolkit)
>>> import qcportal as ptl
>>> from openforcefield.topology import Molecule as OFFMolecule
>>> ds = client.get_collection('OptimizationDataset',
'Kinase Inhibitors: WBO Distributions')
>>> entry = ds.get_entry(ds.df.index[0])
>>> mol_record = OFFMolecule.from_qcschema(entry)
>>> template = mol_record.to_rdkit()
>>> from offpele.topology import Molecule
>>> molecule = Molecule('PDB_without_connectivity.pdb',
template=template)
"""
self._name = name
self._tag = tag
self._rotamer_resolution = rotamer_resolution
self._exclude_terminal_rotamers = exclude_terminal_rotamers
self._connectivity_template = connectivity_template

if isinstance(path, str):
from pathlib import Path
Expand Down Expand Up @@ -539,28 +562,38 @@ def _initialize_from_pdb(self, path):
path : str
The path to a PDB with the molecule structure
"""
print(' - Initializing molecule from PDB')
self._initialize()
print(' - Loading molecule from RDKit')

print(' - Loading molecule from RDKit')
rdkit_toolkit = RDKitToolkitWrapper()
self._rdkit_molecule = rdkit_toolkit.from_pdb(path)

# Use RDKit template, if any, to assign the connectivity to
# the current Molecule object
if self.connectivity_template is not None:
print(' - Assigning connectivity from template')
rdkit_toolkit.assign_connectivity_from_template(self)

# RDKit must generate stereochemistry specifically from 3D coords
print(' - Assigning stereochemistry from 3D coordinates')
rdkit_toolkit.assign_stereochemistry_from_3D(self)

# Set molecule name according to PDB name
if self.name == '':
from pathlib import Path
name = Path(path).stem
print(' - Setting molecule name to \'{}\''.format(name))
self.set_name(name)

# Set molecule tag according to PDB's residue name
if self.tag == 'UNK':
tag = rdkit_toolkit.get_residue_name(self)
print(' - Setting molecule tag to \'{}\''.format(tag))
self.set_tag(tag)

print(' - Representing molecule with the Open Force Field Toolkit')
openforcefield_toolkit = OpenForceFieldToolkitWrapper()

self._off_molecule = openforcefield_toolkit.from_rdkit(self)

def _initialize_from_smiles(self, smiles):
Expand All @@ -572,9 +605,10 @@ def _initialize_from_smiles(self, smiles):
smiles : str
The SMILES tag to construct the molecule structure with
"""
print(' - Initializing molecule from a SMILES tag')
self._initialize()
print(' - Constructing molecule from a SMILES tag with RDKit')

print(' - Loading molecule from RDKit')
rdkit_toolkit = RDKitToolkitWrapper()
self._rdkit_molecule = rdkit_toolkit.from_smiles(smiles)

Expand All @@ -584,10 +618,11 @@ def _initialize_from_smiles(self, smiles):

# Set molecule name according to the SMILES tag
if self.name == '':
print(' - Setting molecule name to \'{}\''.format(smiles))
self.set_name(smiles)

print(' - Representing molecule with the Open Force Field Toolkit')
openforcefield_toolkit = OpenForceFieldToolkitWrapper()

self._off_molecule = openforcefield_toolkit.from_rdkit(self)

def _build_rotamers(self):
Expand Down Expand Up @@ -1254,6 +1289,20 @@ def exclude_terminal_rotamers(self):
"""
return self._exclude_terminal_rotamers

@property
def connectivity_template(self):
"""
The template containing the correct connectivity for this Molecule
object.
Returns
-------
connectivity_template : an rdkit.Chem.rdchem.Mol object
A molecule represented with RDKit to use when assigning the
connectivity of this Molecule object
"""
return self._connectivity_template

@property
def off_molecule(self):
"""
Expand Down Expand Up @@ -1452,5 +1501,8 @@ def _ipython_display_(self):
"""
from IPython.display import display

# Get 2D molecular representation
rdkit_toolkit = RDKitToolkitWrapper()
return display(rdkit_toolkit.get_2D_representation(self))
representation = rdkit_toolkit.get_2D_representation(self)

return display(representation)
30 changes: 27 additions & 3 deletions offpele/utils/toolkits.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import subprocess
from collections import defaultdict
from pathlib import Path
from copy import copy
from copy import deepcopy

import numpy as np
from simtk import unit
Expand Down Expand Up @@ -143,6 +143,29 @@ def from_smiles(self, smiles):

return molecule

def assign_connectivity_from_template(self, molecule):
"""
It assigns the connectivity to an RDKit molecule according to the
connectivity from an RDKit connectivity template.
Parameters
----------
molecule : an offpele.topology.Molecule
The offpele's Molecule object
"""
from rdkit.Chem import AllChem

if molecule.connectivity_template is None:
raise ValueError('A connectivity template must be previously '
+ 'assigned to the molecule')

rdkit_molecule = molecule.rdkit_molecule

rdkit_molecule = AllChem.AssignBondOrdersFromTemplate(
molecule.connectivity_template, rdkit_molecule)

molecule._rdkit_molecule = rdkit_molecule

def assign_stereochemistry_from_3D(self, molecule):
"""
It assigns the stereochemistry to an RDKit molecule according to the
Expand Down Expand Up @@ -308,7 +331,7 @@ def get_atom_ids_with_rotatable_bonds(self, molecule):
"""
from rdkit import Chem

rdkit_molecule = molecule.rdkit_molecule
rdkit_molecule = deepcopy(molecule.rdkit_molecule)

rot_bonds_atom_ids = set([
frozenset(atom_pair) for atom_pair in
Expand Down Expand Up @@ -369,7 +392,8 @@ def get_2D_representation(self, molecule):
from rdkit.Chem import AllChem

rdkit_molecule = molecule.rdkit_molecule
representation_2D = copy(rdkit_molecule)
representation_2D = deepcopy(rdkit_molecule)

AllChem.Compute2DCoords(representation_2D)
return representation_2D

Expand Down