Skip to content

Commit

Permalink
Merge pull request #204 from nlesc-nano/dihedral
Browse files Browse the repository at this point in the history
ENH: Allow specification of a fixed ligand-vector dihedral angle
  • Loading branch information
BvB93 committed Nov 19, 2021
2 parents c586d13 + 7c17563 commit c5937c4
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 34 deletions.
60 changes: 53 additions & 7 deletions CAT/attachment/ligand_attach.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

from .perp_surface import get_surface_vec
from ..mol_utils import get_index, round_coords # noqa: F401
from ..utils import AnchorTup
from ..workflows import WorkFlow, HDF5_INDEX, MOL, OPT
from ..settings_dataframe import SettingsDataFrame
from ..data_handling import mol_to_file, WARN_MAP
Expand Down Expand Up @@ -261,7 +262,19 @@ def get_name() -> str:
else:
raise ValueError(repr(allignment))

lig_array = rot_mol(ligand, vec1, vec2, atoms_other=core.properties.dummies, core=core, idx=idx)
# Rotate the ligands
anchor_tup = ligand.properties.anchor_tup
if anchor_tup.dihedral is None:
lig_array = rot_mol(
ligand, vec1, vec2, atoms_other=core.properties.dummies, core=core, idx=idx
)
else:
lig_array = rot_mol(
ligand, vec1, vec2, atoms_other=core.properties.dummies, core=core,
idx=idx, anchor_tup=anchor_tup,
)

# Combine the ligands and core
qd = core.copy()
array_to_qd(ligand, lig_array, mol_out=qd)
qd.round_coords()
Expand All @@ -281,6 +294,18 @@ def get_name() -> str:
return qd


def _get_dihedrals(mat1: np.ndarray, mat2: np.ndarray, vec3: np.ndarray) -> np.ndarray:
"""Get the dihedral angle between three vectors in radian."""
v1v2 = np.cross(-mat1, mat2)
v2v3 = np.cross(vec3, mat2)
v1v2_v2v3 = np.cross(v1v2, v2v3)
v2_norm_v2 = mat2 / np.linalg.norm(mat2, axis=1)[..., None]
return np.arctan2(
np.einsum("ij,ij->i", v1v2_v2v3, v2_norm_v2),
np.einsum("ij,ij->i", v1v2, v2v3),
)


def _get_rotmat1(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray:
"""Calculate the rotation matrix for rotating **vec1** to **vec2**.
Expand Down Expand Up @@ -345,7 +370,11 @@ def _get_rotmat1(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray:
return ret


def _get_rotmat2(vec: np.ndarray, step: float = (1/16)) -> np.ndarray:
def _get_rotmat2(
vec: np.ndarray,
step: float = 1/16,
angle_vec: "None | np.ndarray" = None,
) -> np.ndarray:
r"""Calculate the rotation matrix for rotating m vectors along their axes, each vector yielding :math:`k = (2 / step)` possible rotations.
Paramaters
Expand All @@ -367,10 +396,13 @@ def _get_rotmat2(vec: np.ndarray, step: float = (1/16)) -> np.ndarray:
[v3, zero, -v1],
[-v2, v1, zero]]).T

step_range = np.pi * np.arange(0.0, 2.0, step)
a1 = np.sin(step_range)[:, None, None, None]
a2 = (1 - np.cos(step_range))[:, None, None, None]

if angle_vec is None:
step_range = np.pi * np.arange(0.0, 2.0, step)
a1 = np.sin(step_range)[:, None, None, None]
a2 = 1 - np.cos(step_range)[:, None, None, None]
else:
a1 = np.sin(angle_vec)[:, None, None]
a2 = 1 - np.cos(angle_vec)[:, None, None]
return np.identity(3) + a1 * w + a2 * w@w


Expand All @@ -383,7 +415,8 @@ def rot_mol(xyz_array: np.ndarray,
bond_length: Optional[int] = None,
step: float = 1/16,
dist_to_self: bool = True,
ret_min_dist: bool = False) -> np.ndarray:
ret_min_dist: bool = False,
anchor_tup: "None | AnchorTup" = None) -> np.ndarray:
r"""Rotate **xyz_array**.
Paramaters
Expand Down Expand Up @@ -441,6 +474,19 @@ def rot_mol(xyz_array: np.ndarray,
rotmat1 = _get_rotmat1(vec1, vec2)
xyz = xyz@rotmat1

# Code-path for fixed-angle dihedrals
if anchor_tup is not None:
i, j, *_ = anchor_tup.anchor_idx
vec3 = xyz[:, i] - xyz[:, j]
dihedral_vec = _get_dihedrals(vec3, vec2, vec1)
dihedral_vec -= anchor_tup.dihedral
rotmat2 = _get_rotmat2(vec2, angle_vec=dihedral_vec)
xyz = np.matmul(xyz, rotmat2, out=xyz)

at_other = sanitize_dim_2(atoms_other)
xyz += (at_other - xyz[:, idx])[:, None]
return xyz

# Create all k possible rotations of all m ligands
rotmat2 = _get_rotmat2(vec2, step=step)
xyz = np.swapaxes(xyz@rotmat2, 0, 1)
Expand Down
44 changes: 22 additions & 22 deletions CAT/data_handling/anchor_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Union, Tuple, Collection, Iterable, SupportsFloat

from rdkit.Chem import Mol
from scm.plams import Units, PT
from scm.plams import Units
from schema import Schema, Use, Optional
from typing_extensions import TypedDict, SupportsIndex

Expand Down Expand Up @@ -33,15 +33,6 @@ class _AnchorDict(TypedDict):
angle_offset: "None | float"


def _pt_sort_func(symbol: str) -> Tuple[int, str]:
return len(symbol), symbol


_SYMBOL = "|".join(sorted(PT.symtonum, key=_pt_sort_func))
_SYMBOL_PATTERN = re.compile(f"({_SYMBOL})")
_UNIT_PATTERN = re.compile(r"([\.\_0-9]+)(\s+)?(\w+)?")


def _parse_group_idx(item: "SupportsIndex | Iterable[SupportsIndex]") -> Tuple[int, ...]:
"""Parse the ``group_idx`` option."""
try:
Expand Down Expand Up @@ -77,10 +68,13 @@ def _parse_kind(typ: "None | str | KindEnum") -> KindEnum:
return KindEnum[typ.upper()]


_UNIT_PATTERN = re.compile(r"([\.\_0-9]+)(\s+)?(\w+)?")


def _parse_angle_offset(
offset: "None | SupportsFloat | SupportsIndex | bytes | str"
) -> "None | float":
"""Parse the ``angle_offset`` option; convert the offset to radians."""
"""Parse the ``angle_offset`` and ``dihedral`` options; convert the offset to radians."""
if offset is None:
return None
elif not isinstance(offset, str):
Expand All @@ -103,6 +97,7 @@ def _parse_angle_offset(
Optional("remove", default=None): Use(_parse_remove),
Optional("kind", default=KindEnum.FIRST): Use(_parse_kind),
Optional("angle_offset", default=None): Use(_parse_angle_offset),
Optional("dihedral", default=None): Use(_parse_angle_offset)
})


Expand Down Expand Up @@ -145,23 +140,28 @@ def parse_anchors(
if remove is not None and not set(group_idx).isdisjoint(remove):
raise ValueError("`group_idx` and `remove` must be disjoint")

# Check that the indices in `group_idx` and `remove` are not out of bounds
group = kwargs["group"]
symbols_list = _SYMBOL_PATTERN.findall(group)
if len(symbols_list) <= max(group_idx):
raise IndexError(f"`group_idx` index {max(group_idx)} is out of bounds "
f"for a `group` with {len(symbols_list)} atoms")
elif remove is not None and len(symbols_list) <= max(remove):
raise IndexError(f"`remove` index {max(remove)} is out of bounds "
f"for a `group` with {len(symbols_list)} atoms")

# Check that at least 3 atoms are available for `angle_offset`
# (so a plane can be defined)
angle_offset = kwargs["angle_offset"]
if angle_offset is not None and len(group_idx) < 3:
raise ValueError("`group_idx` must contain at least 3 atoms when "
"`angle_offset` is specified")

mol = _smiles_to_rdmol(group)
# Check that at least 2 atoms are available for `dihedral`
# (so the third dihedral-defining vector can be defined)
dihedral = kwargs["dihedral"]
if dihedral is not None and len(group_idx) < 2:
raise ValueError("`group_idx` must contain at least 3 atoms when "
"`dihedral` is specified")

# Check that the indices in `group_idx` and `remove` are not out of bounds
mol = _smiles_to_rdmol(kwargs["group"])
atom_count = len(mol.GetAtoms())
if atom_count <= max(group_idx):
raise IndexError(f"`group_idx` index {max(group_idx)} is out of bounds "
f"for a `group` with {atom_count} atoms")
elif remove is not None and atom_count <= max(remove):
raise IndexError(f"`remove` index {max(remove)} is out of bounds "
f"for a `group` with {atom_count} atoms")
ret.append(AnchorTup(**kwargs, mol=mol))
return tuple(ret)
1 change: 1 addition & 0 deletions CAT/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,3 +551,4 @@ class AnchorTup(NamedTuple):
remove: "None | Tuple[int, ...]" = None
kind: KindEnum = KindEnum.FIRST
angle_offset: "None | float" = None
dihedral: "None | float" = None
18 changes: 18 additions & 0 deletions docs/4_optional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ Ligand
* :attr:`anchor.remove`
* :attr:`anchor.kind`
* :attr:`anchor.angle_offset`
* :attr:`anchor.dihedral`

.. note::

Expand Down Expand Up @@ -716,6 +717,23 @@ Ligand
but if so desired one can explicitly pass the unit: ``angle_offset: "0.25 rad"``.


.. attribute:: optional.ligand.anchor.dihedral

:Parameter: * **Type** - :data:`None`, :class:`float` or :class:`str`
* **Default value** – :data:`None`

Manually specify the ligands vector dihedral angle, rather than optimizing it w.r.t. the inter-ligand distance.

The dihedral angle is defined by three vectors:

* The first two in dices in :attr:`anchor.group_idx <optional.ligand.anchor.group_idx>`.
* The core vector(s).
* The Cartesian X-axis as defined by the core.

By default the angle unit is assumed to be in degrees,
but if so desired one can explicitly pass the unit: ``dihedral: "0.5 rad"``.


.. attribute:: optional.ligand.split

:Parameter: * **Type** - :class:`bool`
Expand Down
24 changes: 24 additions & 0 deletions tests/test_files/CAT_dihedral.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
path: .

input_cores:
- Cd68Se55.xyz

input_ligands:
- 'CC(O)=O'

optional:
core:
dirname: core
anchor: Cl

ligand:
dirname: ligand
optimize: True
anchor:
group: "[H]OC=O"
group_idx: [1, 2, 3]
remove: 0
dihedral: 45

qd:
dirname: QD
Binary file added tests/test_files/test_dihedral_dihed_180.npy
Binary file not shown.
Binary file added tests/test_files/test_dihedral_dihed_45.npy
Binary file not shown.
Binary file added tests/test_files/test_dihedral_dihed_45_deg.npy
Binary file not shown.
71 changes: 66 additions & 5 deletions tests/test_ligand_attach.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
"""Tests for :mod:`CAT.attachment.ligand_attach`."""

from os.path import join
import sys
import shutil
from pathlib import Path
from typing import Generator, NamedTuple, TYPE_CHECKING

import yaml
import pytest
import numpy as np

from assertionlib import assertion
from scm.plams import Settings, Molecule

from CAT.base import prep
from CAT.attachment.ligand_attach import (_get_rotmat1, _get_rotmat2)
from CAT.workflows import MOL

if TYPE_CHECKING:
import _pytest

PATH = join('tests', 'test_files')
PATH = Path('tests') / 'test_files'

LIG_PATH = PATH / 'ligand'
QD_PATH = PATH / 'qd'
DB_PATH = PATH / 'database'


def test_get_rotmat1() -> None:
Expand Down Expand Up @@ -50,11 +64,58 @@ def test_get_rotmat2() -> None:
vec1 = np.array([1, 0, 0], dtype=float)
vec2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=float)

ref1 = np.load(join(PATH, 'rotmat2_1.npy'))
ref2 = np.load(join(PATH, 'rotmat2_2.npy'))
ref1 = np.load(PATH / 'rotmat2_1.npy')
ref2 = np.load(PATH / 'rotmat2_2.npy')

rotmat1 = _get_rotmat2(vec1)
np.testing.assert_allclose(rotmat1, ref1)

rotmat2 = _get_rotmat2(vec2)
np.testing.assert_allclose(rotmat2, ref2)


class DihedTup(NamedTuple):
mol: Molecule
ref: np.recarray


class TestDihedral:
PARAMS = {
"dihed_45": 45,
"dihed_45_deg": "45 deg",
"dihed_180": 180.0,
}

@pytest.fixture(scope="class", autouse=True, name="output", params=PARAMS.items(), ids=PARAMS)
def run_cat(self, request: "_pytest.fixtures.SubRequest") -> Generator[DihedTup, None, None]:
# Setup
name, dihed = request.param # type: str, str | float
yaml_path = PATH / 'CAT_dihedral.yaml'
with open(yaml_path, 'r') as f:
arg = Settings(yaml.load(f, Loader=yaml.FullLoader))

arg.path = PATH
arg.optional.ligand.anchor.dihedral = dihed
qd_df, _, _ = prep(arg)
qd = qd_df[MOL].iloc[0]

ref = np.load(PATH / f"test_dihedral_{name}.npy").view(np.recarray)
yield DihedTup(qd, ref)

# Teardown
files = [LIG_PATH, QD_PATH, DB_PATH]
for f in files:
shutil.rmtree(f, ignore_errors=True)

def test_atoms(self, output: DihedTup) -> None:
dtype = [("symbols", "U2"), ("coords", "f8", 3)]
atoms = np.fromiter(
[(at.symbol, at.coords) for at in output.mol], dtype=dtype
).view(np.recarray)

assertion.eq(atoms.dtype, output.ref.dtype)
np.testing.assert_array_equal(atoms.symbols, output.ref.symbols)

if sys.version_info >= (3, 9):
pytest.xfail("Geometries must be updated for RDKit >2019.09.2")
np.testing.assert_allclose(atoms.coords, output.ref.coords)

0 comments on commit c5937c4

Please sign in to comment.