Skip to content

Commit

Permalink
Merge pull request #202 from nlesc-nano/angle
Browse files Browse the repository at this point in the history
ENH: Add the `angle_offset` option for manually rotating ligand vectors
  • Loading branch information
BvB93 committed Nov 19, 2021
2 parents 6b91d35 + 5944a7b commit c586d13
Show file tree
Hide file tree
Showing 11 changed files with 125 additions and 18 deletions.
2 changes: 1 addition & 1 deletion CAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""The **CAT** version."""

__version__ = '0.10.0'
__version__ = '0.10.1'
10 changes: 8 additions & 2 deletions CAT/attachment/ligand_anchoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from rdkit import Chem

from ..logger import logger
from ..utils import get_template, AnchorTup
from ..utils import get_template, AnchorTup, KindEnum
from ..mol_utils import separate_mod # noqa: F401
from ..workflows import MOL, FORMULA, HDF5_INDEX, OPT
from ..settings_dataframe import SettingsDataFrame
Expand Down Expand Up @@ -286,7 +286,13 @@ def substructure_split(

# Update ligand properties
lig.properties.dummies = anchor
lig.properties.anchor = "".join(f"{lig[1 + i].symbol}{1 + i}" for i in anchor_tup.anchor_idx)
if anchor_tup.kind == KindEnum.FIRST:
i = 1 + anchor_tup.anchor_idx[0]
lig.properties.anchor = f"{lig[i].symbol}{i}"
else:
lig.properties.anchor = "".join(
f"{lig[1 + i].symbol}{1 + i}" for i in anchor_tup.anchor_idx
)
lig.properties.anchor_tup = anchor_tup
lig.properties.charge = sum(atom.properties.get('charge', 0) for atom in lig)

Expand Down
33 changes: 25 additions & 8 deletions CAT/attachment/ligand_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from rdkit.Chem import AllChem, Mol
from scm.plams.core.basejob import Job
from scm.plams import (Molecule, Atom, Bond, MoleculeError, add_to_class, Units,
Settings, AMSJob, ADFJob, Cp2kJob)
Settings, AMSJob, ADFJob, Cp2kJob, axis_rotation_matrix)

from .mol_split_cm import SplitMol
from .remove_atoms_cm import RemoveAtoms
Expand Down Expand Up @@ -190,11 +190,14 @@ def optimize_ligand(ligand: Molecule) -> None:
allign_axis(ligand)


def _get_allign_args(mol: "Molecule | Mol", anchor_tup: AnchorTup) -> Tuple[np.ndarray, int, int]:
def _get_allign_args(
mol: "Molecule | Mol",
anchor_tup: AnchorTup,
) -> Tuple[np.ndarray, int, int, Tuple[int, int, int], "None | float"]:
if anchor_tup.kind == KindEnum.FIRST:
idx_rot = idx_trans = anchor_tup.anchor_idx[0]
xyz = mol.as_array() if isinstance(mol, Molecule) else rdmol_as_array(mol)
return xyz, idx_rot, idx_trans
return xyz, idx_rot, idx_trans, anchor_tup.anchor_idx[:3], anchor_tup.angle_offset

# Reserve index `-1` for a dummy atom representing the mean position of all anchors
if isinstance(mol, Molecule):
Expand All @@ -212,14 +215,22 @@ def _get_allign_args(mol: "Molecule | Mol", anchor_tup: AnchorTup) -> Tuple[np.n
idx_trans = anchor_tup.anchor_idx[0]
else:
raise ValueError(f"Unknown anchor kind: {anchor_tup.kind!r}")
return xyz, idx_rot, idx_trans
return xyz, idx_rot, idx_trans, anchor_tup.anchor_idx[:3], anchor_tup.angle_offset


def allign_axis(mol: Molecule) -> None:
"""Allign a molecule with the Cartesian X-axis; setting **anchor** as the origin."""
xyz, idx_rot, idx_trans = _get_allign_args(mol, mol.properties.anchor_tup)
rotmat = optimize_rotmat(xyz, idx_rot)
xyz = np.matmul(xyz, rotmat.T, out=xyz)
xyz, idx_rot, idx_trans, idx_angle, angle = _get_allign_args(mol, mol.properties.anchor_tup)
rotmat1 = optimize_rotmat(xyz, idx_rot)
xyz = np.matmul(xyz, rotmat1.T, out=xyz)

# Manually rotate the ligand by a user-specified amount
if angle is not None:
i, j, k = idx_angle
vec_perp = np.cross(xyz[j] - xyz[i], xyz[k] - xyz[j])
rotmat2 = axis_rotation_matrix(vec_perp, angle)
xyz = np.matmul(xyz, rotmat2.T, out=xyz)

xyz -= xyz[idx_trans]
mol.from_array(xyz.round(decimals=3))

Expand Down Expand Up @@ -574,9 +585,15 @@ def modified_minimum_scan_rdkit(ligand: Molecule, bond_tuple: Tuple[int, int]) -
# Find the conformation with the optimal ligand vector
cost_list = []
for rdmol in rdmol_list:
xyz, idx_rot, idx_trans = _get_allign_args(rdmol, ligand.properties.anchor_tup)
xyz, idx_rot, idx_trans, idx_angle, angle = _get_allign_args(rdmol,
ligand.properties.anchor_tup)
rotmat = optimize_rotmat(xyz, idx_rot)
xyz = np.matmul(xyz, rotmat.T, out=xyz)
if angle is not None:
i, j, k = idx_angle
vec_perp = np.cross(xyz[j] - xyz[i], xyz[k] - xyz[j])
rotmat2 = axis_rotation_matrix(vec_perp, angle)
xyz = np.matmul(xyz, rotmat2.T, out=xyz)
xyz -= xyz[idx_trans]
cost = np.exp(xyz[:, 1:]).sum()
cost_list.append(cost)
Expand Down
70 changes: 65 additions & 5 deletions CAT/data_handling/anchor_parsing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""A module for parsing the ``ligand.anchor`` keyword."""

import re
import operator
from typing import Union, Tuple, Collection, Iterable
from typing import Union, Tuple, Collection, Iterable, SupportsFloat

from rdkit.Chem import Mol
from scm.plams import Units, PT
from schema import Schema, Use, Optional
from typing_extensions import TypedDict, SupportsIndex

Expand All @@ -20,6 +22,24 @@ class _UnparsedAnchorDictBase(TypedDict):

class _UnparsedAnchorDict(_UnparsedAnchorDictBase, total=False):
remove: "None | SupportsIndex | Collection[SupportsIndex]"
angle_offset: "None | SupportsFloat | SupportsIndex | bytes | str"


class _AnchorDict(TypedDict):
group: str
group_idx: Tuple[int, ...]
remove: "None | Tuple[int, ...]"
kind: KindEnum
angle_offset: "None | float"


def _pt_sort_func(symbol: str) -> Tuple[int, str]:
return len(symbol), symbol


_SYMBOL = "|".join(sorted(PT.symtonum, key=_pt_sort_func))
_SYMBOL_PATTERN = re.compile(f"({_SYMBOL})")
_UNIT_PATTERN = re.compile(r"([\.\_0-9]+)(\s+)?(\w+)?")


def _parse_group_idx(item: "SupportsIndex | Iterable[SupportsIndex]") -> Tuple[int, ...]:
Expand Down Expand Up @@ -57,11 +77,32 @@ def _parse_kind(typ: "None | str | KindEnum") -> KindEnum:
return KindEnum[typ.upper()]


def _parse_angle_offset(
offset: "None | SupportsFloat | SupportsIndex | bytes | str"
) -> "None | float":
"""Parse the ``angle_offset`` option; convert the offset to radians."""
if offset is None:
return None
elif not isinstance(offset, str):
return Units.convert(float(offset), "deg", "rad")

# match offsets such as `"5.5 degree"`
match = _UNIT_PATTERN.match(offset)
if match is None:
raise ValueError(f"Invalid offset string: {offset!r}")

offset, _, unit = match.groups()
if unit is None:
unit = "deg"
return Units.convert(float(offset), unit, "rad")


anchor_schema = Schema({
"group": str,
"group_idx": Use(lambda i: tuple(_parse_group_idx(i))),
"group_idx": Use(_parse_group_idx),
Optional("remove", default=None): Use(_parse_remove),
Optional("kind", default=KindEnum.FIRST): Use(_parse_kind),
Optional("angle_offset", default=None): Use(_parse_angle_offset),
})


Expand All @@ -83,7 +124,7 @@ def parse_anchors(
patterns = [patterns]

ret = []
for p in patterns: # type: _UnparsedAnchorDict | str | Mol
for p in patterns: # type: _UnparsedAnchorDict | str | Mol | AnchorTup
if isinstance(p, AnchorTup):
ret.append(p)
elif isinstance(p, Mol):
Expand All @@ -96,12 +137,31 @@ def parse_anchors(
remove = None if not split else (list(mol.GetAtoms())[-1].GetIdx(),)
ret.append(AnchorTup(mol=mol, group=group, remove=remove))
else:
kwargs = anchor_schema.validate(p)
kwargs: _AnchorDict = anchor_schema.validate(p)

# Check that `group_idx` and `remove` are disjoint
group_idx = kwargs["group_idx"]
remove = kwargs["remove"]
if remove is not None and not set(group_idx).isdisjoint(remove):
raise ValueError("`group_idx` and `remove` must be disjoint")

mol = _smiles_to_rdmol(kwargs["group"])
# Check that the indices in `group_idx` and `remove` are not out of bounds
group = kwargs["group"]
symbols_list = _SYMBOL_PATTERN.findall(group)
if len(symbols_list) <= max(group_idx):
raise IndexError(f"`group_idx` index {max(group_idx)} is out of bounds "
f"for a `group` with {len(symbols_list)} atoms")
elif remove is not None and len(symbols_list) <= max(remove):
raise IndexError(f"`remove` index {max(remove)} is out of bounds "
f"for a `group` with {len(symbols_list)} atoms")

# Check that at least 3 atoms are available for `angle_offset`
# (so a plane can be defined)
angle_offset = kwargs["angle_offset"]
if angle_offset is not None and len(group_idx) < 3:
raise ValueError("`group_idx` must contain at least 3 atoms when "
"`angle_offset` is specified")

mol = _smiles_to_rdmol(group)
ret.append(AnchorTup(**kwargs, mol=mol))
return tuple(ret)
1 change: 1 addition & 0 deletions CAT/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,3 +550,4 @@ class AnchorTup(NamedTuple):
anchor_idx: Tuple[int, ...] = ()
remove: "None | Tuple[int, ...]" = None
kind: KindEnum = KindEnum.FIRST
angle_offset: "None | float" = None
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning <http://semver.org/>`_.


0.10.1
******
* Added the option to manually specify angle offsets for the ligand vector.


0.10.0
******
* Add more advanced anchor-parsing options.
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
:target: https://docs.python.org/3.9/

###############################
Compound Attachment Tool 0.10.0
Compound Attachment Tool 0.10.1
###############################

**CAT** is a collection of tools designed for the construction of various chemical compounds.
Expand Down
17 changes: 17 additions & 0 deletions docs/4_optional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ Ligand
* :attr:`anchor.group_idx`
* :attr:`anchor.remove`
* :attr:`anchor.kind`
* :attr:`anchor.angle_offset`

.. note::

Expand Down Expand Up @@ -675,9 +676,11 @@ Ligand
.. note::
This argument has no value be default and must thus be provided by the user.


.. attribute:: optional.ligand.anchor.remove

:Parameter: * **Type** - :data:`None`, :class:`int` or :class:`Sequence[int] <collections.abc.Sequence>`
* **Default value** – :data:`None`

The indices of the to-be removed atoms in :attr:`anchor.group <optional.ligand.anchor.group>`.

Expand All @@ -689,6 +692,7 @@ Ligand
.. attribute:: optional.ligand.anchor.kind

:Parameter: * **Type** - :class:`str`
* **Default value** – ``"first"``

How atoms are to-be attached when multiple anchor atoms are specified in :attr:`anchor.group_idx <optional.ligand.anchor.group_idx>`.

Expand All @@ -699,6 +703,19 @@ Ligand
* ``"mean_translate"``: Attach the mean position of all anchoring atoms to the core and then translate back to the first atom.


.. attribute:: optional.ligand.anchor.angle_offset

:Parameter: * **Type** - :data:`None`, :class:`float` or :class:`str`
* **Default value** – :data:`None`

Manually offset the angle of the ligand vector by a given number.

The plane of rotation is defined by the first three indices in :attr:`anchor.group_idx <optional.ligand.anchor.group_idx>`.

By default the angle unit is assumed to be in degrees,
but if so desired one can explicitly pass the unit: ``angle_offset: "0.25 rad"``.


.. attribute:: optional.ligand.split

:Parameter: * **Type** - :class:`bool`
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the built documents.
release = '0.10.0' # The full version, including alpha/beta/rc tags.
release = '0.10.1' # The full version, including alpha/beta/rc tags.
version = release.rsplit('.', maxsplit=1)[0]


Expand Down
Binary file modified tests/test_files/test_ligand_anchoring.hdf5
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_ligand_anchoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def get_h5py_file(self) -> Generator[h5py.Group, None, None]:
"kind_first": dict(group="OC(=O)C", group_idx=0, kind="FIRST"),
"kind_mean": dict(group="OC(=O)C", group_idx=[0, 2], kind="MEAN"),
"kind_mean_translate": dict(group="OC(=O)C", group_idx=[0, 2], kind="MEAN_TRANSLATE"),
"angle": dict(group="OC(=O)C", group_idx=[0, 1, 2], angle_offset=45),
})

@pytest.mark.parametrize("kwargs_id,kwargs", OPTIONS_DICT.items(), ids=OPTIONS_DICT.keys())
Expand Down

0 comments on commit c586d13

Please sign in to comment.