Permalink
Browse files

Reformat and be more flexible regarding type attribute

  • Loading branch information...
mfherbst committed Aug 11, 2018
1 parent 50fe706 commit 0733eaf8193688c19c6e9ef648fc6f3651d05526
Showing with 119 additions and 118 deletions.
  1. +119 −118 src/interface/python/molsturm/_hdf5.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
## vi: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
## vi: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
## ---------------------------------------------------------------------
##
## Copyright (C) 2017 by the molsturm authors
@@ -27,166 +27,167 @@


def __emplace_ndarray(keyval, group, typ, **kwargs):
dset = group.create_dataset(keyval[0], data=keyval[1], **kwargs)
dset.attrs["type"] = "ndarray"
dset = group.create_dataset(keyval[0], data=keyval[1], **kwargs)
dset.attrs["type"] = "ndarray"


def __extract_ndarray(dataset):
# dtype = __dtype_array_from_hdf5(dataset.dtype)
arr = np.empty(dataset.shape, dtype=dataset.dtype)
dataset.read_direct(arr)
return (basename(dataset.name), arr)
# dtype = __dtype_array_from_hdf5(dataset.dtype)
arr = np.empty(dataset.shape, dtype=dataset.dtype)
dataset.read_direct(arr)
return (basename(dataset.name), arr)


def __emplace_listlike(keyval, group, typ, **kwargs):
dtype = None
# Usually the heuristic for doing the conversion is pretty
# good here, but there are some exceptions.
if all(isinstance(v, str) for v in keyval[1]):
dtype = h5py.special_dtype(vlen=str)
dtype = None
# Usually the heuristic for doing the conversion is pretty
# good here, but there are some exceptions.
if all(isinstance(v, str) for v in keyval[1]):
dtype = h5py.special_dtype(vlen=str)

ary = np.array(keyval[1], dtype=dtype)
dset = group.create_dataset(keyval[0], data=ary, **kwargs)
dset.attrs["type"] = "list"
ary = np.array(keyval[1], dtype=dtype)
dset = group.create_dataset(keyval[0], data=ary, **kwargs)
dset.attrs["type"] = "list"


def __extract_listlike(dataset):
key, arr = __extract_ndarray(dataset)
return (key, arr.tolist())
key, arr = __extract_ndarray(dataset)
return (key, arr.tolist())


def __emplace_none(keyval, group, typ, **kwargs):
dset = group.create_dataset(keyval[0], data=h5py.Empty("f"), **kwargs)
dset.attrs["type"] = "none"
dset = group.create_dataset(keyval[0], data=h5py.Empty("f"), **kwargs)
dset.attrs["type"] = "none"


def __extract_none(dataset):
return (basename(dataset.name), None)
return (basename(dataset.name), None)


# Type transformations for scalar types
# If type not found here, we have an error
# in the direction python -> hdf5, else we ignore it.
__scalar_transform = [
(str, h5py.special_dtype(vlen=str) ),
(bool, np.dtype("b1") ),
(complex, np.dtype("c16") ),
(float, np.dtype("f8") ),
(int, np.dtype("int64") ),
(str, h5py.special_dtype(vlen=str)),
(bool, np.dtype("b1")),
(complex, np.dtype("c16")),
(float, np.dtype("f8")),
(int, np.dtype("int64")),
]


def __emplace_scalar(keyval, group, typ, **kwargs):
dtype = None # Indicate no target type found
for t in __scalar_transform:
if isinstance(keyval[1], t[0]):
dtype = t[1]
break
if dtype is None:
raise TypeError("Encountered unknown data type '" + str(type(keyval[1])) + "'")
dtype = None # Indicate no target type found
for t in __scalar_transform:
if isinstance(keyval[1], t[0]):
dtype = t[1]
break
if dtype is None:
raise TypeError("Encountered unknown data type '" +
str(type(keyval[1])) + "'")

dset = group.create_dataset(keyval[0], data=keyval[1], dtype=dtype, **kwargs)
dset.attrs["type"] = "scalar"
dset = group.create_dataset(keyval[0], data=keyval[1],
dtype=dtype, **kwargs)
dset.attrs["type"] = "scalar"


def __extract_scalar(dataset):
dtype = None # Target type to transform to
for t in __scalar_transform:
if dataset.dtype == t[1]:
dtype = t[0]
break
dtype = None # Target type to transform to
for t in __scalar_transform:
if dataset.dtype == t[1]:
dtype = t[0]
break

if dataset.shape == (): # i.e. HDF5 scalar
ret = dataset.value
else:
ret = dataset[0]
if dataset.shape == (): # i.e. HDF5 scalar
ret = dataset.value
else:
ret = dataset[0]

if dtype is not None:
ret = dtype(ret)

return (basename(dataset.name), ret)
if dtype is not None:
ret = dtype(ret)
return (basename(dataset.name), ret)


def __extract_dataset(dataset):
"""Select extractor based on the type attribute and use that
to make the proper key-value pair out of the dataset
"""
try:
tpe = dataset.attrs["type"]
except KeyError:
raise ValueError("Right now the parser requires a 'type' attribute for each dataset")
# TODO One could fall back to a heuristic where a single element in a dataset
# indicates a scalar and else return a np.array or None according.
#
# That way we cannot get back lists or tuples, but ... fair enough

attribute_map = {
"scalar": __extract_scalar,
"none": __extract_none,
"ndarray": __extract_ndarray,
"list": __extract_listlike,
"tuple": __extract_listlike,
}
return attribute_map[tpe](dataset)
"""Select extractor based on the type attribute and use that
to make the proper key-value pair out of the dataset
"""
if "type" not in dataset.attrs:
if dataset.shape == ():
return __extract_scalar(dataset) # Treat as scalar
else:
return __extract_ndarray(dataset) # Treat as array
else:
# Use type attribute to distinguish what should happen
tpe = dataset.attrs["type"]
return {
"scalar": __extract_scalar,
"none": __extract_none,
"ndarray": __extract_ndarray,
"list": __extract_listlike,
"tuple": __extract_listlike,
}[tpe](dataset)


def __emplace_key_value(kv, group, **kwargs):
"""
Emplace a single key-value pair in the group.
What precisely happends depends on the type of the value
to emplace.
"""

def __emplace_dict_inner(kv, group, typ, **kwargs):
subgroup = group.create_group(kv[0])
emplace_dict(kv[1], subgroup)

emplace_map = [
(np.ndarray, __emplace_ndarray ),
(type(None), __emplace_none ),
(list, __emplace_listlike ),
(tuple, __emplace_listlike ),
(dict, __emplace_dict_inner ),
]

for (typ, emplace) in emplace_map:
if isinstance(kv[1], typ):
try:
emplace(kv, group, typ, **kwargs)
except TypeError as e:
raise TypeError("Error with key '"+kv[0]+"': "+str(e))
return

# Fallback: Assume value is a simple scalar type
try:
__emplace_scalar( kv, group, typ, **kwargs)
except TypeError as e:
raise TypeError("Error with key '"+kv[0]+"': "+str(e))
"""
Emplace a single key-value pair in the group.
What precisely happends depends on the type of the value
to emplace.
"""

def __emplace_dict_inner(kv, group, typ, **kwargs):
subgroup = group.create_group(kv[0])
emplace_dict(kv[1], subgroup)

emplace_map = [
(np.ndarray, __emplace_ndarray),
(type(None), __emplace_none),
(list, __emplace_listlike),
(tuple, __emplace_listlike),
(dict, __emplace_dict_inner),
]

for (typ, emplace) in emplace_map:
if isinstance(kv[1], typ):
try:
emplace(kv, group, typ, **kwargs)
except TypeError as e:
raise TypeError("Error with key '" + kv[0] + "': " + str(e))
return

# Fallback: Assume value is a simple scalar type
try:
__emplace_scalar(kv, group, typ, **kwargs)
except TypeError as e:
raise TypeError("Error with key '" + kv[0] + "': " + str(e))

#
# High-level routines
#


def emplace_dict(d, group, **kwargs):
"""
Emplace a python dictionary "d" into the HDF5 group "group"
using the kwargs to create all neccessary datasets.
"""
for kv in d.items():
__emplace_key_value(kv, group, **kwargs)
"""
Emplace a python dictionary "d" into the HDF5 group "group"
using the kwargs to create all neccessary datasets.
"""
for kv in d.items():
__emplace_key_value(kv, group, **kwargs)


def extract_group(group):
# Recursively extract all groups:
ret = { basename(v.name) : extract_group(v) for v in group.values()
if isinstance(v,h5py.Group) }

# Now deal with all datasets
ret.update([ __extract_dataset(v) for v in group.values()
if isinstance(v,h5py.Dataset) ])

if not all(isinstance(v,(h5py.Dataset,h5py.Group)) or v is None
for v in group.values()):
raise ValueError("Encountered object in h5py which is neither a Group nor a Dataset")

return ret
# Recursively extract all groups:
ret = {basename(v.name): extract_group(v) for v in group.values()
if isinstance(v, h5py.Group)}

# Now deal with all datasets
ret.update([__extract_dataset(v) for v in group.values()
if isinstance(v, h5py.Dataset)])

if not all(isinstance(v, (h5py.Dataset, h5py.Group)) or v is None
for v in group.values()):
raise ValueError("Encountered object in h5py which is neither "
"a Group nor a Dataset")
return ret

0 comments on commit 0733eaf

Please sign in to comment.