Navigation Menu

Skip to content
This repository has been archived by the owner on Mar 22, 2023. It is now read-only.

Commit

Permalink
Merge pull request #43 from molsturm/dev-extend-hdf5
Browse files Browse the repository at this point in the history
Refactor _hdf5.py
  • Loading branch information
mfherbst committed Aug 11, 2018
2 parents 9b95a00 + 0733eaf commit 826545a
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 117 deletions.
2 changes: 1 addition & 1 deletion modules/gint
235 changes: 119 additions & 116 deletions src/interface/python/molsturm/_hdf5.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
## vi: tabstop=2 shiftwidth=2 softtabstop=2 expandtab ## vi: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
## --------------------------------------------------------------------- ## ---------------------------------------------------------------------
## ##
## Copyright (C) 2017 by the molsturm authors ## Copyright (C) 2017 by the molsturm authors
Expand Down Expand Up @@ -27,164 +27,167 @@




def __emplace_ndarray(keyval, group, typ, **kwargs): def __emplace_ndarray(keyval, group, typ, **kwargs):
dset = group.create_dataset(keyval[0], data=keyval[1], **kwargs) dset = group.create_dataset(keyval[0], data=keyval[1], **kwargs)
dset.attrs["type"] = "ndarray" dset.attrs["type"] = "ndarray"




def __extract_ndarray(dataset): def __extract_ndarray(dataset):
# dtype = __dtype_array_from_hdf5(dataset.dtype) # dtype = __dtype_array_from_hdf5(dataset.dtype)
arr = np.empty(dataset.shape, dtype=dataset.dtype) arr = np.empty(dataset.shape, dtype=dataset.dtype)
dataset.read_direct(arr) dataset.read_direct(arr)
return (basename(dataset.name), arr) return (basename(dataset.name), arr)




def __emplace_listlike(keyval, group, typ, **kwargs): def __emplace_listlike(keyval, group, typ, **kwargs):
dtype = None dtype = None
# Usually the heuristic for doing the conversion is pretty # Usually the heuristic for doing the conversion is pretty
# good here, but there are some exceptions. # good here, but there are some exceptions.
if all(isinstance(v, str) for v in keyval[1]): if all(isinstance(v, str) for v in keyval[1]):
dtype = h5py.special_dtype(vlen=str) dtype = h5py.special_dtype(vlen=str)


ary = np.array(keyval[1], dtype=dtype) ary = np.array(keyval[1], dtype=dtype)
dset = group.create_dataset(keyval[0], data=ary, **kwargs) dset = group.create_dataset(keyval[0], data=ary, **kwargs)
dset.attrs["type"] = "list" dset.attrs["type"] = "list"




def __extract_listlike(dataset): def __extract_listlike(dataset):
key, arr = __extract_ndarray(dataset) key, arr = __extract_ndarray(dataset)
return (key, arr.tolist()) return (key, arr.tolist())




def __emplace_none(keyval, group, typ, **kwargs): def __emplace_none(keyval, group, typ, **kwargs):
dset = group.create_dataset(keyval[0], data=h5py.Empty("f"), **kwargs) dset = group.create_dataset(keyval[0], data=h5py.Empty("f"), **kwargs)
dset.attrs["type"] = "none" dset.attrs["type"] = "none"




def __extract_none(dataset): def __extract_none(dataset):
return (basename(dataset.name), None) return (basename(dataset.name), None)




# Type transformations for scalar types # Type transformations for scalar types
# If type not found here, we have an error # If type not found here, we have an error
# in the direction python -> hdf5, else we ignore it. # in the direction python -> hdf5, else we ignore it.
__scalar_transform = [ __scalar_transform = [
(str, h5py.special_dtype(vlen=str) ), (str, h5py.special_dtype(vlen=str)),
(bool, np.dtype("b1") ), (bool, np.dtype("b1")),
(complex, np.dtype("c16") ), (complex, np.dtype("c16")),
(float, np.dtype("f8") ), (float, np.dtype("f8")),
(int, np.dtype("int64") ), (int, np.dtype("int64")),
] ]




def __emplace_scalar(keyval, group, typ, **kwargs): def __emplace_scalar(keyval, group, typ, **kwargs):
dtype = None # Indicate no target type found dtype = None # Indicate no target type found
for t in __scalar_transform: for t in __scalar_transform:
if isinstance(keyval[1], t[0]): if isinstance(keyval[1], t[0]):
dtype = t[1] dtype = t[1]
break break
if dtype is None: if dtype is None:
raise TypeError("Encountered unknown data type '" + str(type(keyval[1])) + "'") raise TypeError("Encountered unknown data type '" +
str(type(keyval[1])) + "'")


# Make a np array with one element and insert dset = group.create_dataset(keyval[0], data=keyval[1],
dset = group.create_dataset(keyval[0], data=np.array([keyval[1]], dtype=dtype), dtype=dtype, **kwargs)
dtype=dtype, **kwargs) dset.attrs["type"] = "scalar"
dset.attrs["type"] = "scalar"




def __extract_scalar(dataset): def __extract_scalar(dataset):
dtype = None # Target type to transform to dtype = None # Target type to transform to
for t in __scalar_transform: for t in __scalar_transform:
if dataset.dtype == t[1]: if dataset.dtype == t[1]:
dtype = t[0] dtype = t[0]
break break


ret = dataset[0] if dataset.shape == (): # i.e. HDF5 scalar
if dtype is not None: ret = dataset.value
ret = dtype(ret) else:
ret = dataset[0]


return (basename(dataset.name), ret) if dtype is not None:
ret = dtype(ret)
return (basename(dataset.name), ret)




def __extract_dataset(dataset): def __extract_dataset(dataset):
"""Select extractor based on the type attribute and use that """Select extractor based on the type attribute and use that
to make the proper key-value pair out of the dataset to make the proper key-value pair out of the dataset
""" """
try: if "type" not in dataset.attrs:
tpe = dataset.attrs["type"] if dataset.shape == ():
except KeyError: return __extract_scalar(dataset) # Treat as scalar
raise ValueError("Right now the parser requires a 'type' attribute for each dataset") else:
# TODO One could fall back to a heuristic where a single element in a dataset return __extract_ndarray(dataset) # Treat as array
# indicates a scalar and else return a np.array or None according. else:
# # Use type attribute to distinguish what should happen
# That way we cannot get back lists or tuples, but ... fair enough tpe = dataset.attrs["type"]

return {
attribute_map = { "scalar": __extract_scalar,
"scalar": __extract_scalar, "none": __extract_none,
"none": __extract_none, "ndarray": __extract_ndarray,
"ndarray": __extract_ndarray, "list": __extract_listlike,
"list": __extract_listlike, "tuple": __extract_listlike,
"tuple": __extract_listlike, }[tpe](dataset)
}
return attribute_map[tpe](dataset)


def __emplace_key_value(kv, group, **kwargs): def __emplace_key_value(kv, group, **kwargs):
""" """
Emplace a single key-value pair in the group. Emplace a single key-value pair in the group.
What precisely happends depends on the type of the value What precisely happends depends on the type of the value
to emplace. to emplace.
""" """


def __emplace_dict_inner(kv, group, typ, **kwargs): def __emplace_dict_inner(kv, group, typ, **kwargs):
subgroup = group.create_group(kv[0]) subgroup = group.create_group(kv[0])
emplace_dict(kv[1], subgroup) emplace_dict(kv[1], subgroup)


emplace_map = [ emplace_map = [
(np.ndarray, __emplace_ndarray ), (np.ndarray, __emplace_ndarray),
(type(None), __emplace_none ), (type(None), __emplace_none),
(list, __emplace_listlike ), (list, __emplace_listlike),
(tuple, __emplace_listlike ), (tuple, __emplace_listlike),
(dict, __emplace_dict_inner ), (dict, __emplace_dict_inner),
] ]


for (typ, emplace) in emplace_map: for (typ, emplace) in emplace_map:
if isinstance(kv[1], typ): if isinstance(kv[1], typ):
try: try:
emplace(kv, group, typ, **kwargs) emplace(kv, group, typ, **kwargs)
except TypeError as e: except TypeError as e:
raise TypeError("Error with key '"+kv[0]+"': "+str(e)) raise TypeError("Error with key '" + kv[0] + "': " + str(e))
return return


# Fallback: Assume value is a simple scalar type # Fallback: Assume value is a simple scalar type
try: try:
__emplace_scalar( kv, group, typ, **kwargs) __emplace_scalar(kv, group, typ, **kwargs)
except TypeError as e: except TypeError as e:
raise TypeError("Error with key '"+kv[0]+"': "+str(e)) raise TypeError("Error with key '" + kv[0] + "': " + str(e))


# #
# High-level routines # High-level routines
# #



def emplace_dict(d, group, **kwargs): def emplace_dict(d, group, **kwargs):
""" """
Emplace a python dictionary "d" into the HDF5 group "group" Emplace a python dictionary "d" into the HDF5 group "group"
using the kwargs to create all neccessary datasets. using the kwargs to create all neccessary datasets.
""" """
for kv in d.items(): for kv in d.items():
__emplace_key_value(kv, group, **kwargs) __emplace_key_value(kv, group, **kwargs)




def extract_group(group): def extract_group(group):
# Recursively extract all groups: # Recursively extract all groups:
ret = { basename(v.name) : extract_group(v) for v in group.values() ret = {basename(v.name): extract_group(v) for v in group.values()
if isinstance(v,h5py.Group) } if isinstance(v, h5py.Group)}


# Now deal with all datasets # Now deal with all datasets
ret.update([ __extract_dataset(v) for v in group.values() ret.update([__extract_dataset(v) for v in group.values()
if isinstance(v,h5py.Dataset) ]) if isinstance(v, h5py.Dataset)])


if not all(isinstance(v,(h5py.Dataset,h5py.Group)) or v is None if not all(isinstance(v, (h5py.Dataset, h5py.Group)) or v is None
for v in group.values()): for v in group.values()):
raise ValueError("Encountered object in h5py which is neither a Group nor a Dataset") raise ValueError("Encountered object in h5py which is neither "

"a Group nor a Dataset")
return ret return ret

0 comments on commit 826545a

Please sign in to comment.