Merge pull request #43 from molsturm/dev-extend-hdf5

Refactor _hdf5.py
molsturm · Aug 11, 2018 · 826545a · 826545a
2 parents 9b95a00 + 0733eaf
commit 826545a
Show file tree

Hide file tree

Showing 2 changed files with 120 additions and 117 deletions.
diff --git a/modules/gint b/modules/gint
diff --git a/src/interface/python/molsturm/_hdf5.py b/src/interface/python/molsturm/_hdf5.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-## vi: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
+## vi: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
 ## ---------------------------------------------------------------------
 ##
 ## Copyright (C) 2017 by the molsturm authors
@@ -27,164 +27,167 @@
 
 
 def __emplace_ndarray(keyval, group, typ, **kwargs):
-  dset = group.create_dataset(keyval[0], data=keyval[1], **kwargs)
+    dset = group.create_dataset(keyval[0], data=keyval[1], **kwargs)
-  dset.attrs["type"] = "ndarray"
+    dset.attrs["type"] = "ndarray"
 
 
 def __extract_ndarray(dataset):
-  # dtype = __dtype_array_from_hdf5(dataset.dtype)
+    # dtype = __dtype_array_from_hdf5(dataset.dtype)
-  arr = np.empty(dataset.shape, dtype=dataset.dtype)
+    arr = np.empty(dataset.shape, dtype=dataset.dtype)
-  dataset.read_direct(arr)
+    dataset.read_direct(arr)
-  return (basename(dataset.name), arr)
+    return (basename(dataset.name), arr)
 
 
 def __emplace_listlike(keyval, group, typ, **kwargs):
-  dtype = None
+    dtype = None
-  # Usually the heuristic for doing the conversion is pretty
+    # Usually the heuristic for doing the conversion is pretty
-  # good here, but there are some exceptions.
+    # good here, but there are some exceptions.
-  if all(isinstance(v, str) for v in keyval[1]):
+    if all(isinstance(v, str) for v in keyval[1]):
-    dtype = h5py.special_dtype(vlen=str)
+        dtype = h5py.special_dtype(vlen=str)
 
-  ary = np.array(keyval[1], dtype=dtype)
+    ary = np.array(keyval[1], dtype=dtype)
-  dset = group.create_dataset(keyval[0], data=ary, **kwargs)
+    dset = group.create_dataset(keyval[0], data=ary, **kwargs)
-  dset.attrs["type"] = "list"
+    dset.attrs["type"] = "list"
 
 
 def __extract_listlike(dataset):
-  key, arr = __extract_ndarray(dataset)
+    key, arr = __extract_ndarray(dataset)
-  return (key, arr.tolist())
+    return (key, arr.tolist())
 
 
 def __emplace_none(keyval, group, typ, **kwargs):
-  dset = group.create_dataset(keyval[0], data=h5py.Empty("f"), **kwargs)
+    dset = group.create_dataset(keyval[0], data=h5py.Empty("f"), **kwargs)
-  dset.attrs["type"] = "none"
+    dset.attrs["type"] = "none"
 
 
 def __extract_none(dataset):
-  return (basename(dataset.name), None)
+    return (basename(dataset.name), None)
 
 
 # Type transformations for scalar types
 # If type not found here, we have an error
 # in the direction python -> hdf5, else we ignore it.
 __scalar_transform = [
-  (str,     h5py.special_dtype(vlen=str) ),
+    (str,     h5py.special_dtype(vlen=str)),
-  (bool,    np.dtype("b1")               ),
+    (bool,    np.dtype("b1")),
-  (complex, np.dtype("c16")              ),
+    (complex, np.dtype("c16")),
-  (float,   np.dtype("f8")               ),
+    (float,   np.dtype("f8")),
-  (int,     np.dtype("int64")            ),
+    (int,     np.dtype("int64")),
 ]
 
 
 def __emplace_scalar(keyval, group, typ, **kwargs):
-  dtype = None  # Indicate no target type found
+    dtype = None  # Indicate no target type found
-  for t in __scalar_transform:
+    for t in __scalar_transform:
-    if isinstance(keyval[1], t[0]):
+        if isinstance(keyval[1], t[0]):
-      dtype = t[1]
+            dtype = t[1]
-      break
+            break
-  if dtype is None:
+    if dtype is None:
-    raise TypeError("Encountered unknown data type '" + str(type(keyval[1])) + "'")
+        raise TypeError("Encountered unknown data type '" +
+                        str(type(keyval[1])) + "'")
 
-  # Make a np array with one element and insert
+    dset = group.create_dataset(keyval[0], data=keyval[1],
-  dset = group.create_dataset(keyval[0], data=np.array([keyval[1]], dtype=dtype),
+                                dtype=dtype, **kwargs)
-                              dtype=dtype, **kwargs)
+    dset.attrs["type"] = "scalar"
-  dset.attrs["type"] = "scalar"
 
 
 def __extract_scalar(dataset):
-  dtype = None  # Target type to transform to
+    dtype = None  # Target type to transform to
-  for t in __scalar_transform:
+    for t in __scalar_transform:
-    if dataset.dtype == t[1]:
+        if dataset.dtype == t[1]:
-      dtype = t[0]
+            dtype = t[0]
-      break
+            break
 
-  ret = dataset[0]
+    if dataset.shape == ():  # i.e. HDF5 scalar
-  if dtype is not None:
+        ret = dataset.value
-    ret = dtype(ret)
+    else:
+        ret = dataset[0]
 
-  return (basename(dataset.name), ret)
+    if dtype is not None:
+        ret = dtype(ret)
+    return (basename(dataset.name), ret)
 
 
 def __extract_dataset(dataset):
-  """Select extractor based on the type attribute and use that
+    """Select extractor based on the type attribute and use that
-     to make the proper key-value pair out of the dataset
+       to make the proper key-value pair out of the dataset
-  """
+    """
-  try:
+    if "type" not in dataset.attrs:
-    tpe = dataset.attrs["type"]
+        if dataset.shape == ():
-  except KeyError:
+            return __extract_scalar(dataset)  # Treat as scalar
-    raise ValueError("Right now the parser requires a 'type' attribute for each dataset")
+        else:
-    # TODO One could fall back to a heuristic where a single element in a dataset
+            return __extract_ndarray(dataset)  # Treat as array
-    #      indicates a scalar and else return a np.array or None according.
+    else:
-    #
+        # Use type attribute to distinguish what should happen
-    #      That way we cannot get back lists or tuples, but ... fair enough
+        tpe = dataset.attrs["type"]
-
+        return {
-  attribute_map = {
+            "scalar":   __extract_scalar,
-    "scalar":   __extract_scalar,
+            "none":     __extract_none,
-    "none":     __extract_none,
+            "ndarray":  __extract_ndarray,
-    "ndarray":  __extract_ndarray,
+            "list":     __extract_listlike,
-    "list":     __extract_listlike,
+            "tuple":    __extract_listlike,
-    "tuple":    __extract_listlike,
+        }[tpe](dataset)
-  }
+
-  return attribute_map[tpe](dataset)
 
 def __emplace_key_value(kv, group, **kwargs):
-  """
+    """
-  Emplace a single key-value pair in the group.
+    Emplace a single key-value pair in the group.
-
+
-  What precisely happends depends on the type of the value
+    What precisely happends depends on the type of the value
-  to emplace.
+    to emplace.
-  """
+    """
-
+
-  def __emplace_dict_inner(kv, group, typ, **kwargs):
+    def __emplace_dict_inner(kv, group, typ, **kwargs):
-    subgroup = group.create_group(kv[0])
+        subgroup = group.create_group(kv[0])
-    emplace_dict(kv[1], subgroup)
+        emplace_dict(kv[1], subgroup)
-
+
-  emplace_map = [
+    emplace_map = [
-    (np.ndarray,   __emplace_ndarray    ),
+        (np.ndarray,   __emplace_ndarray),
-    (type(None),   __emplace_none       ),
+        (type(None),   __emplace_none),
-    (list,         __emplace_listlike   ),
+        (list,         __emplace_listlike),
-    (tuple,        __emplace_listlike   ),
+        (tuple,        __emplace_listlike),
-    (dict,         __emplace_dict_inner ),
+        (dict,         __emplace_dict_inner),
-  ]
+    ]
-
+
-  for (typ, emplace) in emplace_map:
+    for (typ, emplace) in emplace_map:
-    if isinstance(kv[1], typ):
+        if isinstance(kv[1], typ):
-      try:
+            try:
-        emplace(kv, group, typ, **kwargs)
+                emplace(kv, group, typ, **kwargs)
-      except TypeError as e:
+            except TypeError as e:
-        raise TypeError("Error with key '"+kv[0]+"': "+str(e))
+                raise TypeError("Error with key '" + kv[0] + "': " + str(e))
-      return
+            return
-
+
-  # Fallback: Assume value is a simple scalar type
+    # Fallback: Assume value is a simple scalar type
-  try:
+    try:
-    __emplace_scalar( kv, group, typ, **kwargs)
+        __emplace_scalar(kv, group, typ, **kwargs)
-  except TypeError as e:
+    except TypeError as e:
-    raise TypeError("Error with key '"+kv[0]+"': "+str(e))
+        raise TypeError("Error with key '" + kv[0] + "': " + str(e))
 
 #
 # High-level routines
 #
 
+
 def emplace_dict(d, group, **kwargs):
-  """
+    """
-  Emplace a python dictionary "d" into the HDF5 group "group"
+    Emplace a python dictionary "d" into the HDF5 group "group"
-  using the kwargs to create all neccessary datasets.
+    using the kwargs to create all neccessary datasets.
-  """
+    """
-  for kv in d.items():
+    for kv in d.items():
-    __emplace_key_value(kv, group, **kwargs)
+        __emplace_key_value(kv, group, **kwargs)
 
 
 def extract_group(group):
-  # Recursively extract all groups:
+    # Recursively extract all groups:
-  ret = { basename(v.name) : extract_group(v) for v in group.values()
+    ret = {basename(v.name): extract_group(v) for v in group.values()
-          if isinstance(v,h5py.Group) }
+           if isinstance(v, h5py.Group)}
-
+
-  # Now deal with all datasets
+    # Now deal with all datasets
-  ret.update([ __extract_dataset(v) for v in group.values()
+    ret.update([__extract_dataset(v) for v in group.values()
-               if isinstance(v,h5py.Dataset) ])
+                if isinstance(v, h5py.Dataset)])
-
+
-  if not all(isinstance(v,(h5py.Dataset,h5py.Group)) or v is None
+    if not all(isinstance(v, (h5py.Dataset, h5py.Group)) or v is None
-             for v in group.values()):
+               for v in group.values()):
-    raise ValueError("Encountered object in h5py which is neither a Group nor a Dataset")
+        raise ValueError("Encountered object in h5py which is neither "
-
+                         "a Group nor a Dataset")
-  return ret
+    return ret