Skip to content

Commit

Permalink
Merge pull request #216 from nlesc-nano/rdf
Browse files Browse the repository at this point in the history
ENH: Add initial support for periodic property calculations
  • Loading branch information
BvB93 committed Feb 22, 2021
2 parents 6fc7837 + 62ca563 commit 86c856f
Show file tree
Hide file tree
Showing 27 changed files with 120,316 additions and 71 deletions.
134 changes: 111 additions & 23 deletions FOX/classes/multi_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)
from typing import (
Sequence, Optional, Union, List, Hashable, Callable, Iterable, Dict, Tuple, Any, Mapping,
overload, TypeVar, Type, Container, cast, TYPE_CHECKING,
overload, TypeVar, Type, Container, cast, Generator, TYPE_CHECKING,
)

import numpy as np
Expand All @@ -38,7 +38,7 @@
from scm.plams import Molecule, Atom, Bond, PeriodicTable
from nanoutils import group_by_values, Literal

from ..utils import slice_iter
from ..utils import slice_iter, lattice_to_volume
from .multi_mol_magic import _MultiMolecule, AliasTuple
from ..io.read_kf import read_kf
from ..io.read_xyz import read_multi_xyz
Expand Down Expand Up @@ -68,6 +68,8 @@
__all__ = ['MultiMolecule']

MT = TypeVar('MT', bound='MultiMolecule')
_DType = TypeVar("_DType", bound=np.dtype)

MolSubset = Union[None, slice, int]
AtomSubset = Union[
None, slice, range, int, str, Sequence[int], Sequence[str], Sequence[Sequence[int]]
Expand All @@ -79,6 +81,21 @@ def neg_exp(x: np.ndarray) -> np.ndarray:
return np.exp(-x)


def _periodicity_iter(
periodicity: Iterable[Literal["x", "y", "z"]],
ar: np.ndarray[Any, _DType],
) -> Generator[Tuple[int, np.ndarray[Any, _DType]], None, None]:
dct = {"x": 0, "y": 1, "z": 2}
for i in sorted(periodicity):
j = dct[i]
yield j, ar[j]


class _GetNone:
def __getitem__(self, key: object) -> None:
return None


class MultiMolecule(_MultiMolecule):
"""A class designed for handling a and manipulating large numbers of molecules.
Expand All @@ -101,6 +118,9 @@ class MultiMolecule(_MultiMolecule):
A Settings instance for storing miscellaneous user-defined (meta-)data.
Is devoid of keys by default.
Stored in the :attr:`MultiMolecule.properties` attribute.
lattice : :class:`np.ndarray[np.float64] <numpy.ndarray>`, shape :math:`(m, 3, 3)` or :math:`(3, 3)`, optional
Lattice vectors for periodic systems.
For non-periodic systems this value should be :data:`None`.
Attributes
----------
Expand All @@ -112,8 +132,11 @@ class MultiMolecule(_MultiMolecule):
properties : :class:`plams.Settings <scm.plams.core.settings.Settings>`
A Settings instance for storing miscellaneous user-defined (meta-)data.
Is devoid of keys by default.
lattice : :class:`np.ndarray[np.float64] <numpy.ndarray>`, shape :math:`(m, 3, 3)` or :math:`(3, 3)`, optional
Lattice vectors for periodic systems.
For non-periodic systems this value should be :data:`None`.
"""
""" # noqa: E501

@overload
def round(self: MT, decimals: int = ..., *, inplace: Literal[False] = ...) -> MT: ... # type: ignore[misc] # noqa: E501
Expand Down Expand Up @@ -1257,8 +1280,15 @@ def get_at_idx(rmsf: pd.DataFrame,

"""############################# Radial Distribution Functions ##########################"""

def init_rdf(self, mol_subset: MolSubset = None, atom_subset: AtomSubset = None,
dr: float = 0.05, r_max: float = 12.0) -> pd.DataFrame:
def init_rdf(
self,
mol_subset: MolSubset = None,
atom_subset: AtomSubset = None,
*,
dr: float = 0.05,
r_max: float = 12.0,
periodic: None | Iterable[Literal["x", "y", "z"]] = None,
) -> pd.DataFrame:
"""Initialize the calculation of radial distribution functions (RDFs).
RDFs are calculated for all possible atom-pairs in **atom_subset** and returned as a
Expand All @@ -1279,6 +1309,10 @@ def init_rdf(self, mol_subset: MolSubset = None, atom_subset: AtomSubset = None,
concentric spheres.
r_max : :class:`float`
The maximum to be evaluated interatomic distance in Ångström.
periodic : :class:`str`, optional
If specified, correct for the systems periodicity if
:attr:`self.lattice is not None <MultiMolecule.lattice>`.
Accepts ``"x"``, ``"y"`` and/or ``"z"``.
Returns
-------
Expand All @@ -1290,8 +1324,10 @@ def init_rdf(self, mol_subset: MolSubset = None, atom_subset: AtomSubset = None,
"""
# If **atom_subset** is None: extract atomic symbols from they keys of **self.atoms**
at_subset = atom_subset or sorted(self.atoms, key=str)
atom_pairs = self.get_pair_dict(at_subset, r=2)
if atom_subset is not None:
atom_pairs = self.get_pair_dict(atom_subset, r=2)
else:
atom_pairs = self.get_pair_dict(sorted(self.atoms, key=str), r=2)

# Construct an empty dataframe with appropiate dimensions, indices and keys
df = get_rdf_df(atom_pairs, dr, r_max)
Expand All @@ -1300,19 +1336,49 @@ def init_rdf(self, mol_subset: MolSubset = None, atom_subset: AtomSubset = None,
m_subset = self._get_mol_subset(mol_subset)
m_self = self[m_subset]

# Parse the lattice and periodicty settings
if periodic is not None:
periodic_set = {i.lower() for i in periodic}
if not periodic_set.issubset("xyz"):
raise ValueError("periodic expected `x`, `y` and/or `z`; "
f"observed value: {periodic!r}")
elif self.lattice is None:
raise TypeError("cannot perform periodic calculations if the "
"molecules `lattice` is None")
lattice_ar = self.lattice if self.lattice.ndim == 2 else self.lattice[m_subset]
else:
lattice_ar = _GetNone()
periodic_set = "xyz"

# Identify the volume occupied by the system
if self.lattice is None:
volume = None
else:
volume = lattice_to_volume(
self.lattice if self.lattice.ndim == 2 else self.lattice[m_subset]
)

# Fill the dataframe with RDF's, averaged over all conformations in this instance
n_mol = len(m_self)
for key, (idx0, idx1) in atom_pairs.items():
shape = n_mol, len(idx0), len(idx1)
for key, (i, j) in atom_pairs.items():
shape = n_mol, len(i), len(j)
iterator = slice_iter(shape, m_self.dtype.itemsize)
for slc in iterator:
dist_mat = m_self.get_dist_mat(mol_subset=slc, atom_subset=(idx0, idx1))
df[key] += get_rdf(dist_mat, dr=dr, r_max=r_max)
dist_mat = m_self.get_dist_mat(
mol_subset=slc, atom_subset=(i, j),
lattice=lattice_ar[slc], periodicity=periodic_set,
)
df[key] += get_rdf(dist_mat, dr=dr, r_max=r_max, volume=volume)
df /= n_mol
return df

def get_dist_mat(self, mol_subset: MolSubset = None,
atom_subset: Tuple[AtomSubset, AtomSubset] = (None, None)) -> np.ndarray:
def get_dist_mat(
self,
mol_subset: MolSubset = None,
atom_subset: Tuple[AtomSubset, AtomSubset] = (None, None),
lattice: None | np.ndarray[Any, np.dtype[np.float64]] = None,
periodicity: Iterable[Literal["x", "y", "z"]] = "xyz",
) -> np.ndarray[Any, np.dtype[np.float64]]:
"""Create and return a distance matrix for all molecules and atoms in this instance.
Parameters
Expand All @@ -1325,32 +1391,54 @@ def get_dist_mat(self, mol_subset: MolSubset = None,
Perform the calculation on a subset of atoms in this instance, as
determined by their atomic index or atomic symbol.
Include all :math:`n` atoms per molecule in this instance if :data:`None`.
lattice : :class:`np.ndarray[np.float64] <numpy.ndarray>`, shape :math:`(3, 3)` or :math:`(m, 3, 3)`, optional
If not :data:`None`, use the specified lattice vectors for
correcting periodic effects.
periodicty : :class:`str`
The axes along which the system's periodicity extends; accepts
``"x"``, ``"y"`` and/or ``"z"``.
Only relevant if ``lattice is not None``.
Returns
-------
:class:`np.ndarray[np.float64] <numpy.ndarray>`, shape :math:`(m, n, k)`
A 3D distance matrix of :math:`m` molecules, created out of two sets of :math:`n`
and :math:`k` atoms.
"""
""" # noqa: E501
# Define array slices
m_subset = self._get_mol_subset(mol_subset)
i = m_subset, self._get_atom_subset(atom_subset[0])
j = m_subset, self._get_atom_subset(atom_subset[1])

# Slice the XYZ array
A = self[i]
B = self[j]
A = self[m_subset, self._get_atom_subset(atom_subset[0])]
B = self[m_subset, self._get_atom_subset(atom_subset[1])]

# Create, fill and return the distance matrix
if A.ndim == 2:
return cdist(A, B)[None, ...]

shape = A.shape[0], A.shape[1], B.shape[1]
ret = np.empty(shape, dtype=self.dtype)
for k, (a, b) in enumerate(zip(A, B)):
ret[k] = cdist(a, b)
return ret
if lattice is None:
shape = A.shape[0], A.shape[1], B.shape[1]
ret = np.empty(shape, dtype=self.dtype)
for k, (a, b) in enumerate(zip(A, B)):
ret[k] = cdist(a, b)
return ret

ret = np.abs(A[..., None, :] - B[..., None, :, :])
lat_norm = np.linalg.norm(lattice, axis=-1)
if lat_norm.ndim == 1:
iterator = _periodicity_iter(periodicity, lat_norm)
for i, ar1 in iterator:
ret[..., i][ret[..., i] > (ar1 / 2)] -= ar1
elif lat_norm.ndim == 2:
iterator = _periodicity_iter(periodicity, lat_norm.T)
for i, _ar2 in iterator:
ar2 = np.full(ret.shape[:-1], _ar2[..., None, None])
condition = ret[..., i] > (ar2 / 2)
ret[..., i][condition] -= ar2[condition]
else:
raise ValueError
return np.linalg.norm(ret, axis=-1)

def get_pair_dict(self, atom_subset: Union[Sequence[AtomSubset],
Mapping[Hashable, Sequence[AtomSubset]]],
Expand Down
55 changes: 40 additions & 15 deletions FOX/classes/multi_mol_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@
from assertionlib.ndrepr import NDRepr
from nanoutils import ArrayLike, Literal


class AliasTuple(NamedTuple):
"""A 2-tuple used for :attr:`FOX.MultiMolecule.atoms` values."""

alias: str
slice: Union[slice, ellipsis, np.ndarray[Any, np.dtype[np.intp]]] # noqa: F821


if TYPE_CHECKING:
import numpy.typing as npt

Expand All @@ -50,10 +42,18 @@ class AliasTuple(NamedTuple):
Iterable[Tuple[str, _ArLikeInt]],
]

AliasDict = MappingProxyType[str, AliasTuple]
AliasDict = MappingProxyType[str, AliasTuple] # noqa: F821
AliasMapping = Mapping[str, Tuple[str, Union[slice, ellipsis, _ArLikeInt]]] # noqa: F821

__all__: List[str] = ["_MultiMolecule"]
__all__: List[str] = ["_MultiMolecule", "AliasTuple"]


class AliasTuple(NamedTuple):
"""A 2-tuple used for :attr:`FOX.MultiMolecule.atoms` values."""

alias: str
slice: Union[slice, ellipsis, np.ndarray[Any, np.dtype[np.intp]]] # noqa: F821


T = TypeVar('T')
MT = TypeVar('MT', bound='_MultiMolecule')
Expand Down Expand Up @@ -84,6 +84,16 @@ def _to_int_array(ar: _ArLikeInt) -> np.ndarray[Any, np.dtype[np.intp]]:
return ret


def _parse_lattice(a: npt.ArrayLike) -> np.ndarray[Any, np.dtype[np.float64]]:
ar = np.asarray(a).astype(np.float64, casting="same_kind", copy=False)
if ar.ndim not in {2, 3}:
raise ValueError("`lattice` expected a 2D or 3D array; "
f"observed dimensionality: {ar.ndim!r}")
elif ar.shape[-2:] != (3, 3):
raise ValueError(f"Invalid `lattice` shape: {ar.shape!r}")
return ar


class _MolLoc(Generic[MT]):
"""A getter and setter for atom-type-based slicing.
Expand Down Expand Up @@ -226,7 +236,8 @@ def __new__(
bonds: Optional[np.ndarray] = None,
properties: Optional[Mapping] = None,
*,
atoms_alias: Optional[Mapping[str, slice]] = None
atoms_alias: Optional[Mapping[str, slice]] = None,
lattice: None | npt.ArrayLike = None,
) -> MT:
"""Create and return a new object."""
obj = np.array(coords, dtype=np.float64, ndmin=3, copy=False).view(cls)
Expand All @@ -236,6 +247,7 @@ def __new__(
obj.bonds = cast(np.ndarray, bonds)
obj.properties = cast("Settings", properties)
obj.atoms_alias = cast("AliasDict", atoms_alias)
obj.lattice = cast("Optional[np.ndarray[Any, np.dtype[np.float64]]]", lattice)
obj._ndrepr = NDRepr()
return obj

Expand All @@ -248,6 +260,7 @@ def __array_finalize__(self, obj: _MultiMolecule) -> None:
self.bonds = getattr(obj, 'bonds', None)
self.properties = getattr(obj, 'properties', None)
self.atoms_alias = getattr(obj, 'atoms_alias', None)
self.lattice = getattr(obj, 'lattice', None)
self._ndrepr = getattr(obj, '_ndrepr', None)

"""##################### Properties for managing instance attributes ######################"""
Expand Down Expand Up @@ -301,15 +314,16 @@ def atoms_alias(self, value: None | AliasMapping) -> None:
return None

@property
def bonds(self) -> np.ndarray:
def bonds(self) -> np.ndarray[Any, np.dtype[np.intp]]:
return self._bonds

@bonds.setter
def bonds(self, value: Optional[np.ndarray]) -> None:
def bonds(self, value: Optional[np.ndarray[Any, np.dtype[np.integer[Any]]]]) -> None:
if value is None:
bonds = np.zeros((0, 3), dtype=np.intp)
else:
bonds = np.array(value, dtype=np.intp, ndmin=2, copy=False)
_bonds = np.array(value, ndmin=2, copy=False)
bonds = _bonds.astype(np.intp, casting="same_kind", copy=False)

# Set bond orders to 1 (i.e. 10 / 10) if the order is not specified
if bonds.shape[1] == 2:
Expand All @@ -323,9 +337,20 @@ def properties(self) -> Settings:
return self._properties

@properties.setter
def properties(self, value: Optional[Mapping]) -> None:
def properties(self, value: Optional[Mapping[Any, Any]]) -> None:
self._properties = Settings() if value is None else Settings(value)

@property
def lattice(self) -> None | np.ndarray[Any, np.dtype[np.float64]]:
return self._lattice

@lattice.setter
def lattice(self, value: None | npt.ArrayLike) -> None:
if value is None:
self._lattice = None
else:
self._lattice = _parse_lattice(value)

"""############################### PLAMS-based properties ################################"""

@property
Expand Down

0 comments on commit 86c856f

Please sign in to comment.