diff --git a/CAT/__version__.py b/CAT/__version__.py index e84a08af..25acc07a 100644 --- a/CAT/__version__.py +++ b/CAT/__version__.py @@ -1,3 +1,3 @@ """The **CAT** version.""" -__version__ = '0.10.0' +__version__ = '0.10.1' diff --git a/CAT/attachment/ligand_anchoring.py b/CAT/attachment/ligand_anchoring.py index 988469b2..405b3600 100644 --- a/CAT/attachment/ligand_anchoring.py +++ b/CAT/attachment/ligand_anchoring.py @@ -34,7 +34,7 @@ from rdkit import Chem from ..logger import logger -from ..utils import get_template, AnchorTup +from ..utils import get_template, AnchorTup, KindEnum from ..mol_utils import separate_mod # noqa: F401 from ..workflows import MOL, FORMULA, HDF5_INDEX, OPT from ..settings_dataframe import SettingsDataFrame @@ -286,7 +286,13 @@ def substructure_split( # Update ligand properties lig.properties.dummies = anchor - lig.properties.anchor = "".join(f"{lig[1 + i].symbol}{1 + i}" for i in anchor_tup.anchor_idx) + if anchor_tup.kind == KindEnum.FIRST: + i = 1 + anchor_tup.anchor_idx[0] + lig.properties.anchor = f"{lig[i].symbol}{i}" + else: + lig.properties.anchor = "".join( + f"{lig[1 + i].symbol}{1 + i}" for i in anchor_tup.anchor_idx + ) lig.properties.anchor_tup = anchor_tup lig.properties.charge = sum(atom.properties.get('charge', 0) for atom in lig) diff --git a/CAT/attachment/ligand_opt.py b/CAT/attachment/ligand_opt.py index 589be1b8..496d06a1 100644 --- a/CAT/attachment/ligand_opt.py +++ b/CAT/attachment/ligand_opt.py @@ -42,7 +42,7 @@ from rdkit.Chem import AllChem, Mol from scm.plams.core.basejob import Job from scm.plams import (Molecule, Atom, Bond, MoleculeError, add_to_class, Units, - Settings, AMSJob, ADFJob, Cp2kJob) + Settings, AMSJob, ADFJob, Cp2kJob, axis_rotation_matrix) from .mol_split_cm import SplitMol from .remove_atoms_cm import RemoveAtoms @@ -190,11 +190,14 @@ def optimize_ligand(ligand: Molecule) -> None: allign_axis(ligand) -def _get_allign_args(mol: "Molecule | Mol", anchor_tup: AnchorTup) -> Tuple[np.ndarray, int, int]: +def _get_allign_args( + mol: "Molecule | Mol", + anchor_tup: AnchorTup, +) -> Tuple[np.ndarray, int, int, Tuple[int, int, int], "None | float"]: if anchor_tup.kind == KindEnum.FIRST: idx_rot = idx_trans = anchor_tup.anchor_idx[0] xyz = mol.as_array() if isinstance(mol, Molecule) else rdmol_as_array(mol) - return xyz, idx_rot, idx_trans + return xyz, idx_rot, idx_trans, anchor_tup.anchor_idx[:3], anchor_tup.angle_offset # Reserve index `-1` for a dummy atom representing the mean position of all anchors if isinstance(mol, Molecule): @@ -212,14 +215,22 @@ def _get_allign_args(mol: "Molecule | Mol", anchor_tup: AnchorTup) -> Tuple[np.n idx_trans = anchor_tup.anchor_idx[0] else: raise ValueError(f"Unknown anchor kind: {anchor_tup.kind!r}") - return xyz, idx_rot, idx_trans + return xyz, idx_rot, idx_trans, anchor_tup.anchor_idx[:3], anchor_tup.angle_offset def allign_axis(mol: Molecule) -> None: """Allign a molecule with the Cartesian X-axis; setting **anchor** as the origin.""" - xyz, idx_rot, idx_trans = _get_allign_args(mol, mol.properties.anchor_tup) - rotmat = optimize_rotmat(xyz, idx_rot) - xyz = np.matmul(xyz, rotmat.T, out=xyz) + xyz, idx_rot, idx_trans, idx_angle, angle = _get_allign_args(mol, mol.properties.anchor_tup) + rotmat1 = optimize_rotmat(xyz, idx_rot) + xyz = np.matmul(xyz, rotmat1.T, out=xyz) + + # Manually rotate the ligand by a user-specified amount + if angle is not None: + i, j, k = idx_angle + vec_perp = np.cross(xyz[j] - xyz[i], xyz[k] - xyz[j]) + rotmat2 = axis_rotation_matrix(vec_perp, angle) + xyz = np.matmul(xyz, rotmat2.T, out=xyz) + xyz -= xyz[idx_trans] mol.from_array(xyz.round(decimals=3)) @@ -574,9 +585,15 @@ def modified_minimum_scan_rdkit(ligand: Molecule, bond_tuple: Tuple[int, int]) - # Find the conformation with the optimal ligand vector cost_list = [] for rdmol in rdmol_list: - xyz, idx_rot, idx_trans = _get_allign_args(rdmol, ligand.properties.anchor_tup) + xyz, idx_rot, idx_trans, idx_angle, angle = _get_allign_args(rdmol, + ligand.properties.anchor_tup) rotmat = optimize_rotmat(xyz, idx_rot) xyz = np.matmul(xyz, rotmat.T, out=xyz) + if angle is not None: + i, j, k = idx_angle + vec_perp = np.cross(xyz[j] - xyz[i], xyz[k] - xyz[j]) + rotmat2 = axis_rotation_matrix(vec_perp, angle) + xyz = np.matmul(xyz, rotmat2.T, out=xyz) xyz -= xyz[idx_trans] cost = np.exp(xyz[:, 1:]).sum() cost_list.append(cost) diff --git a/CAT/data_handling/anchor_parsing.py b/CAT/data_handling/anchor_parsing.py index 2ebd0c07..9eec4ba2 100644 --- a/CAT/data_handling/anchor_parsing.py +++ b/CAT/data_handling/anchor_parsing.py @@ -1,9 +1,11 @@ """A module for parsing the ``ligand.anchor`` keyword.""" +import re import operator -from typing import Union, Tuple, Collection, Iterable +from typing import Union, Tuple, Collection, Iterable, SupportsFloat from rdkit.Chem import Mol +from scm.plams import Units, PT from schema import Schema, Use, Optional from typing_extensions import TypedDict, SupportsIndex @@ -20,6 +22,24 @@ class _UnparsedAnchorDictBase(TypedDict): class _UnparsedAnchorDict(_UnparsedAnchorDictBase, total=False): remove: "None | SupportsIndex | Collection[SupportsIndex]" + angle_offset: "None | SupportsFloat | SupportsIndex | bytes | str" + + +class _AnchorDict(TypedDict): + group: str + group_idx: Tuple[int, ...] + remove: "None | Tuple[int, ...]" + kind: KindEnum + angle_offset: "None | float" + + +def _pt_sort_func(symbol: str) -> Tuple[int, str]: + return len(symbol), symbol + + +_SYMBOL = "|".join(sorted(PT.symtonum, key=_pt_sort_func)) +_SYMBOL_PATTERN = re.compile(f"({_SYMBOL})") +_UNIT_PATTERN = re.compile(r"([\.\_0-9]+)(\s+)?(\w+)?") def _parse_group_idx(item: "SupportsIndex | Iterable[SupportsIndex]") -> Tuple[int, ...]: @@ -57,11 +77,32 @@ def _parse_kind(typ: "None | str | KindEnum") -> KindEnum: return KindEnum[typ.upper()] +def _parse_angle_offset( + offset: "None | SupportsFloat | SupportsIndex | bytes | str" +) -> "None | float": + """Parse the ``angle_offset`` option; convert the offset to radians.""" + if offset is None: + return None + elif not isinstance(offset, str): + return Units.convert(float(offset), "deg", "rad") + + # match offsets such as `"5.5 degree"` + match = _UNIT_PATTERN.match(offset) + if match is None: + raise ValueError(f"Invalid offset string: {offset!r}") + + offset, _, unit = match.groups() + if unit is None: + unit = "deg" + return Units.convert(float(offset), unit, "rad") + + anchor_schema = Schema({ "group": str, - "group_idx": Use(lambda i: tuple(_parse_group_idx(i))), + "group_idx": Use(_parse_group_idx), Optional("remove", default=None): Use(_parse_remove), Optional("kind", default=KindEnum.FIRST): Use(_parse_kind), + Optional("angle_offset", default=None): Use(_parse_angle_offset), }) @@ -83,7 +124,7 @@ def parse_anchors( patterns = [patterns] ret = [] - for p in patterns: # type: _UnparsedAnchorDict | str | Mol + for p in patterns: # type: _UnparsedAnchorDict | str | Mol | AnchorTup if isinstance(p, AnchorTup): ret.append(p) elif isinstance(p, Mol): @@ -96,12 +137,31 @@ def parse_anchors( remove = None if not split else (list(mol.GetAtoms())[-1].GetIdx(),) ret.append(AnchorTup(mol=mol, group=group, remove=remove)) else: - kwargs = anchor_schema.validate(p) + kwargs: _AnchorDict = anchor_schema.validate(p) + + # Check that `group_idx` and `remove` are disjoint group_idx = kwargs["group_idx"] remove = kwargs["remove"] if remove is not None and not set(group_idx).isdisjoint(remove): raise ValueError("`group_idx` and `remove` must be disjoint") - mol = _smiles_to_rdmol(kwargs["group"]) + # Check that the indices in `group_idx` and `remove` are not out of bounds + group = kwargs["group"] + symbols_list = _SYMBOL_PATTERN.findall(group) + if len(symbols_list) <= max(group_idx): + raise IndexError(f"`group_idx` index {max(group_idx)} is out of bounds " + f"for a `group` with {len(symbols_list)} atoms") + elif remove is not None and len(symbols_list) <= max(remove): + raise IndexError(f"`remove` index {max(remove)} is out of bounds " + f"for a `group` with {len(symbols_list)} atoms") + + # Check that at least 3 atoms are available for `angle_offset` + # (so a plane can be defined) + angle_offset = kwargs["angle_offset"] + if angle_offset is not None and len(group_idx) < 3: + raise ValueError("`group_idx` must contain at least 3 atoms when " + "`angle_offset` is specified") + + mol = _smiles_to_rdmol(group) ret.append(AnchorTup(**kwargs, mol=mol)) return tuple(ret) diff --git a/CAT/utils.py b/CAT/utils.py index 2cbdd489..765e8005 100644 --- a/CAT/utils.py +++ b/CAT/utils.py @@ -550,3 +550,4 @@ class AnchorTup(NamedTuple): anchor_idx: Tuple[int, ...] = () remove: "None | Tuple[int, ...]" = None kind: KindEnum = KindEnum.FIRST + angle_offset: "None | float" = None diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 09c0f900..2b98b242 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,11 @@ All notable changes to this project will be documented in this file. This project adheres to `Semantic Versioning `_. +0.10.1 +****** +* Added the option to manually specify angle offsets for the ligand vector. + + 0.10.0 ****** * Add more advanced anchor-parsing options. diff --git a/README.rst b/README.rst index 3deecd3a..2461f1cc 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ :target: https://docs.python.org/3.9/ ############################### -Compound Attachment Tool 0.10.0 +Compound Attachment Tool 0.10.1 ############################### **CAT** is a collection of tools designed for the construction of various chemical compounds. diff --git a/docs/4_optional.rst b/docs/4_optional.rst index da4e0a91..111c9369 100644 --- a/docs/4_optional.rst +++ b/docs/4_optional.rst @@ -629,6 +629,7 @@ Ligand * :attr:`anchor.group_idx` * :attr:`anchor.remove` * :attr:`anchor.kind` + * :attr:`anchor.angle_offset` .. note:: @@ -675,9 +676,11 @@ Ligand .. note:: This argument has no value be default and must thus be provided by the user. + .. attribute:: optional.ligand.anchor.remove :Parameter: * **Type** - :data:`None`, :class:`int` or :class:`Sequence[int] ` + * **Default value** – :data:`None` The indices of the to-be removed atoms in :attr:`anchor.group `. @@ -689,6 +692,7 @@ Ligand .. attribute:: optional.ligand.anchor.kind :Parameter: * **Type** - :class:`str` + * **Default value** – ``"first"`` How atoms are to-be attached when multiple anchor atoms are specified in :attr:`anchor.group_idx `. @@ -699,6 +703,19 @@ Ligand * ``"mean_translate"``: Attach the mean position of all anchoring atoms to the core and then translate back to the first atom. + .. attribute:: optional.ligand.anchor.angle_offset + + :Parameter: * **Type** - :data:`None`, :class:`float` or :class:`str` + * **Default value** – :data:`None` + + Manually offset the angle of the ligand vector by a given number. + + The plane of rotation is defined by the first three indices in :attr:`anchor.group_idx `. + + By default the angle unit is assumed to be in degrees, + but if so desired one can explicitly pass the unit: ``angle_offset: "0.25 rad"``. + + .. attribute:: optional.ligand.split :Parameter: * **Type** - :class:`bool` diff --git a/docs/conf.py b/docs/conf.py index 98d551a0..893e9d56 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -77,7 +77,7 @@ # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the built documents. -release = '0.10.0' # The full version, including alpha/beta/rc tags. +release = '0.10.1' # The full version, including alpha/beta/rc tags. version = release.rsplit('.', maxsplit=1)[0] diff --git a/tests/test_files/test_ligand_anchoring.hdf5 b/tests/test_files/test_ligand_anchoring.hdf5 index 9dbbf486..c7c52f24 100644 Binary files a/tests/test_files/test_ligand_anchoring.hdf5 and b/tests/test_files/test_ligand_anchoring.hdf5 differ diff --git a/tests/test_ligand_anchoring.py b/tests/test_ligand_anchoring.py index 2b45a393..4d5a6a3e 100644 --- a/tests/test_ligand_anchoring.py +++ b/tests/test_ligand_anchoring.py @@ -146,6 +146,7 @@ def get_h5py_file(self) -> Generator[h5py.Group, None, None]: "kind_first": dict(group="OC(=O)C", group_idx=0, kind="FIRST"), "kind_mean": dict(group="OC(=O)C", group_idx=[0, 2], kind="MEAN"), "kind_mean_translate": dict(group="OC(=O)C", group_idx=[0, 2], kind="MEAN_TRANSLATE"), + "angle": dict(group="OC(=O)C", group_idx=[0, 1, 2], angle_offset=45), }) @pytest.mark.parametrize("kwargs_id,kwargs", OPTIONS_DICT.items(), ids=OPTIONS_DICT.keys())