Skip to content

Commit

Permalink
Merge pull request #233 from nlesc-nano/param_mapping
Browse files Browse the repository at this point in the history
ENH: Store metadata and net charges for each individual system in PES-averaged ARMC and ARMCPT
  • Loading branch information
BvB93 committed Apr 12, 2021
2 parents 6ae3fa5 + b17a7ad commit f04dfef
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 88 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
- name: Install dependencies (minimum version)
if: matrix.special == '; minimum version'
run: |
conda create -n test -c conda-forge python=${{ matrix.version }} pyyaml=5.1 rdkit=2018.09 numpy=1.15 h5py=2.10 pandas=0.23 scipy=1.2.0 matplotlib=3.0
conda create -n test -c conda-forge python=${{ matrix.version }} pyyaml=5.1 rdkit=2018.09 numpy=1.15 h5py=2.10 pandas=0.24 scipy=1.2.0 matplotlib=3.0
source $CONDA/bin/activate test
pip install Nano-Utils==1.2.1 schema==0.7.1 AssertionLib==2.2 noodles==0.3.3 sphinx==2.4 sphinx_rtd_theme==0.3.0
pip install -e .[test]
Expand Down
80 changes: 54 additions & 26 deletions FOX/armc/param_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
"""

from __future__ import annotations

from copy import deepcopy
from abc import ABC, abstractmethod
from types import MappingProxyType
Expand Down Expand Up @@ -139,11 +141,11 @@ class ParamMappingABC(AbstractDataClass, ABC):
"""

_net_charge: Optional[float]
_net_charge: Optional[np.ndarray]
_move_range: np.ndarray

#: Fill values for when optional keys are absent.
FILL_VALUE: ClassVar[Mapping[MetadataKeys, np.generic]] = MappingProxyType({
FILL_VALUE: ClassVar[MappingProxyType[MetadataKeys, np.generic]] = MappingProxyType({
'min': np.float64(-np.inf),
'max': np.float64(np.inf),
'count': np.int64(-1),
Expand Down Expand Up @@ -237,6 +239,8 @@ def move_range(self, value: ArrayLike) -> None:
for i in range(1, len(_ar)):
self.param[i] = self.param[0].copy()
self.param_old[i] = self.param_old[0].copy()
for (_, k), v in self.metadata.items():
self.metadata[i, k] = v.copy()
else:
raise ValueError(f"Expected 'move_range' length: {prm_len}; "
f"observed length: {len(_ar)}")
Expand All @@ -256,12 +260,19 @@ def _set_data(self, value: Union[InputMapping, pd.DataFrame]) -> None:
param = _parse_param(dct)

# Fill in the defaults
metadata = pd.DataFrame(index=param.index)
for name, fill_value in self.FILL_VALUE.items():
if name not in dct:
metadata[name] = fill_value
else:
metadata[name] = np.asarray(dct[name], dtype=fill_value.dtype)
metadata = pd.DataFrame(
index=param.index,
columns=pd.MultiIndex(
levels=[pd.Index([], dtype=np.int64), pd.Index([], dtype=np.object_)],
codes=[[], []],
),
)
for i in param.columns:
for name, fill_value in self.FILL_VALUE.items():
if name not in dct:
metadata[i, name] = fill_value
else:
metadata[i, name] = np.asarray(dct[name], dtype=fill_value.dtype)

# Construct a dictionary to contain the old parameter
self.param = param
Expand All @@ -279,7 +290,13 @@ def __eq__(self, value: Any) -> bool:
return False

ret = np.all(self.move_range == value.move_range)
ret &= self._net_charge == value._net_charge
if self._net_charge is not None and value._net_charge is not None:
ret &= (
(self._net_charge.shape == value._net_charge.shape) and
np.all(self._net_charge == value._net_charge)
)
else:
ret &= type(self._net_charge) is type(value._net_charge)
if not ret:
return False

Expand All @@ -298,9 +315,15 @@ def _str_iterator(self):
def _set_net_charge(self) -> None:
"""Set the total charge of the system."""
if 'charge' in self.param.index:
self._net_charge = get_net_charge(
self.param.loc['charge', 0], self.metadata.loc['charge', 'count']
iterator = (
get_net_charge(
self.param.loc['charge', i],
self.metadata.loc['charge', (i, 'count')]
) for i in self.param.columns
)
array = np.fromiter(iterator, dtype=np.float64)
array.setflags(write=False)
self._net_charge = array
else:
self._net_charge = None

Expand All @@ -323,8 +346,9 @@ def add_param(self, idx: Tup3, value: float, **kwargs: Any) -> None:
self.param.loc[idx] = value
self.param_old.loc[idx] = value

metadata: Dict[str, Any] = self.FILL_VALUE.copy() # type: ignore[attr-defined]
metadata.update(kwargs)
idx_range = self.metadata.columns.levels[0]
metadata = {(i, k): v for i in idx_range for k, v in self.FILL_VALUE.items()}
metadata.update(((i, k), v) for i in idx_range for k, v in kwargs.items())
self.metadata.loc[idx] = metadata

def __call__(self, logger: Optional[Logger] = None,
Expand Down Expand Up @@ -430,10 +454,13 @@ def to_struct_array(self) -> np.ndarray:
cls = type(self)
dtype_dict = {k: type(v) for k, v in cls.FILL_VALUE.items()}
dtype_dict['unit'] = h5py.string_dtype('utf-8')
dtype = np.dtype(list(dtype_dict.items()))
dtype: np.dtype[np.void] = np.dtype(list(dtype_dict.items()))

iterator = (v for _, v in self.metadata.items())
return np.rec.fromarrays(iterator, dtype=dtype)
ret = []
for i in self.metadata.columns.levels[0]: # type: int
iterator = (v for _, v in self.metadata[i].items())
ret.append(np.rec.fromarrays(iterator, dtype=dtype))
return np.array(ret)

def constraints_to_str(self) -> pd.Series:
"""Convert the constraints into a human-readably :class:`pandas.Series`."""
Expand Down Expand Up @@ -495,12 +522,13 @@ def to_yaml_dict(self) -> Dict[str, Any]:
idx_dict[key, param] = len(lst) - 1

# Set the extremites
for (key, param, atom), (min_, max_) in self.metadata[['min', 'max']].iterrows():
metadata = self.metadata[[(0, 'min'), (0, 'max')]]
for (key, param, atom), (min_, max_) in metadata.iterrows():
i = idx_dict[key, param]
ret[key][i]['constraints'].append(f'{min_} < {atom} < {max_}')

# Set the parameters
iterator = ((k, self.param.at[k, 0], self.metadata.loc[k, ['frozen', 'unit']]) for k in index) # noqa: E501
iterator = ((k, self.param.at[k, 0], self.metadata.loc[k, [(0, 'frozen'), (0, 'unit')]]) for k in index) # noqa: E501
for (key, param, atom), value, (frozen, unit) in iterator:
i = idx_dict[key, param]
if frozen:
Expand Down Expand Up @@ -548,15 +576,15 @@ def identify_move(self, param_idx: int) -> Tuple[Tup3, float, float]:
""" # noqa
# Define a random parameter
variable = ~self.metadata['frozen']
variable = ~self.metadata[0, 'frozen']
random_prm: pd.Series = self.param.loc[variable, param_idx].sample()
idx, x1 = next(random_prm.items()) # Type: Tup3, float

# Define a random move size
x2: float = np.random.choice(self.move_range[param_idx], 1)[0]
return idx, x1, x2

def clip_move(self, idx: Tup3, value: float) -> float:
def clip_move(self, idx: Tup3, value: float) -> np.float64:
"""Ensure that **value** falls within a user-specified range.
Parameters
Expand All @@ -572,7 +600,7 @@ def clip_move(self, idx: Tup3, value: float) -> float:
The newly clipped value of the moved parameter.
""" # noqa
prm_min, prm_max = self.metadata.loc[idx, ['min', 'max']]
prm_min, prm_max = self.metadata.loc[idx, [(0, 'min'), (0, 'max')]]
return np.clip(value, prm_min, prm_max)

def apply_constraints(self, idx: Tup3, value: float, param_idx: int) -> Optional[ChargeError]:
Expand All @@ -592,18 +620,18 @@ def apply_constraints(self, idx: Tup3, value: float, param_idx: int) -> Optional
""" # noqa
key = idx[:2]
atom = idx[2]
charge = self._net_charge if key[1] in self.CHARGE_LIKE else None
charge = self._net_charge[0] if key[1] in self.CHARGE_LIKE else None

frozen_idx = self.metadata.loc[key, 'frozen']
frozen_idx = self.metadata.loc[key, (0, 'frozen')]
frozen = frozen_idx.index[frozen_idx] if frozen_idx.any() else None

return update_charge(
atom, value,
param=self.param.loc[key, param_idx],
count=self.metadata.loc[key, 'count'],
count=self.metadata.loc[key, (0, 'count')],
atom_coefs=self.constraints[key],
prm_min=self.metadata.loc[key, 'min'],
prm_max=self.metadata.loc[key, 'max'],
prm_min=self.metadata.loc[key, (0, 'min')],
prm_max=self.metadata.loc[key, (0, 'max')],
net_charge=charge,
exclude=frozen,
)
23 changes: 13 additions & 10 deletions FOX/armc/sanitization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
"""

from __future__ import annotations

import os
import copy
import warnings
Expand Down Expand Up @@ -169,13 +171,13 @@ def validate_atoms(
raise RuntimeError(msg)


def validate_charge(charge: Optional[float], tolerance: float = 0.01) -> None:
def validate_charge(charge: None | np.ndarray, tolerance: float = 0.01) -> None:
"""Check if the net **charge** is integer within a given **tolerance**."""
if charge is None:
return

delta = abs(charge - round(charge))
if delta > tolerance:
delta = np.abs(charge - charge.round())
if (delta > tolerance).any():
raise ValueError(f'Net charge {charge} not integer within a tolerance {tolerance}')


Expand All @@ -185,8 +187,8 @@ def validate_constraints(param: ParamMapping, tolerance: float = 0.01,
msg = ""

# Check minima and maxima
min_ok = param.metadata['min'] <= param.param[0]
max_ok = param.metadata['max'] >= param.param[0]
min_ok = param.metadata[0, 'min'] <= param.param[0]
max_ok = param.metadata[0, 'max'] >= param.param[0]
if not min_ok.all():
msg += f"Parameters smaller then specified minima:\n {param.param.loc[~min_ok, 0]}\n"
if not max_ok.all():
Expand Down Expand Up @@ -730,13 +732,14 @@ def update_count(param, psf=None, mol=None): # noqa: E302
else:
raise TypeError("'psf' and 'mol' cannot be both 'None'")

prm_count = param.metadata['count']
at_sequence = [atoms.split() for *_, atoms in prm_count.index]
index = param.metadata.index
prm_count_list = [param.metadata[i, 'count'] for i in param.metadata.columns.levels[0]]
at_sequence = [atoms.split() for *_, atoms in index]
for count in count_iter:
data = get_atom_count(at_sequence, count)
series = pd.Series({k: v for k, v in zip(prm_count.index, data) if v is not None},
name='unit')
prm_count.update(series)
series = pd.Series({k: v for k, v in zip(index, data) if v is not None}, name='unit')
for prm_count in prm_count_list:
prm_count.update(series)


def _assign_residues(plams_mol: Molecule, res_list: Iterable[Iterable[int]]) -> None:
Expand Down
23 changes: 16 additions & 7 deletions FOX/io/hdf5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,13 +620,22 @@ def _get_xyz_dset(f: File) -> Tuple[np.ndarray, Dict[str, List[int]]]:

def _metadata_to_df(f: File, key: str) -> pd.DataFrame:
"""Convert the ``param_metadata`` dataset into a :class:`~pandas.DataFrame`."""
_index = f[key].attrs['index']
idx_gen = (_index[k].astype(str) for k in _index.dtype.names)
index = pd.MultiIndex.from_tuples(zip(*idx_gen), names=_index.dtype.names)

df = pd.DataFrame.from_records(f[key][:], index=index)
df['unit'] = df['unit'].values.astype(str)
df.sort_index(axis='columns', inplace=True)
dset = f[key]
index_ar = dset.attrs['index']
index = pd.MultiIndex.from_tuples(
zip(*(index_ar[k].astype(str) for k in index_ar.dtype.names)),
names=index_ar.dtype.names,
)

columns = pd.MultiIndex(
levels=[pd.Index([], dtype=np.int64), pd.Index([], dtype=np.object_)],
codes=[[], []],
)

df = pd.DataFrame(index=index, columns=columns)
for i, ar in enumerate(dset[:]):
for k in dset.dtype.names:
df[i, k] = ar[k] if k != "unit" else ar[k].astype(str)
return df


Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ ignore =
E704
E731
W504
E721
FOX/examples/ ALL
FOX/properties/*.pyi ALL
max-line-length = 100
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
'pytest-cov',
'flake8',
'pydocstyle',
'auto-FOX-data@git+https://github.com/nlesc-nano/auto-FOX-data@1.1.6',
'auto-FOX-data@git+https://github.com/nlesc-nano/auto-FOX-data@1.1.8',
'ase',
'CAT@git+https://github.com/nlesc-nano/CAT@master',
]
Expand Down Expand Up @@ -103,7 +103,7 @@
'pyyaml>=5.1',
'numpy>=1.15',
'scipy>=1.2',
'pandas',
'pandas>=0.24',
'schema>=0.7.1',
'AssertionLib>=2.3',
'noodles>=0.3.3',
Expand Down

0 comments on commit f04dfef

Please sign in to comment.