Enable reading n5 or zarr data in HDF5VolumeLoader

inferno-pytorch · Dec 26, 2018 · d8287f5 · d8287f5
1 parent f0a979a
commit d8287f5
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 20 deletions.
diff --git a/inferno/io/volumetric/volume.py b/inferno/io/volumetric/volume.py
@@ -125,6 +125,17 @@ def __repr__(self):
 
 
 class HDF5VolumeLoader(VolumeLoader):
+
+    @staticmethod
+    def is_h5(file_path):
+        ext = os.path.splitext(file_path)[1].lower()
+        if ext in ('.h5', '.hdf', '.hdf5'):
+            return True
+        elif ext in ('.zarr', '.zr', '.n5'):
+            return False
+        else:
+            raise RuntimeError("Could not infer volume type for file extension %s" % ext)
+
     def __init__(self, path, path_in_h5_dataset=None, data_slice=None, transforms=None,
                  name=None, **slicing_config):
 
@@ -163,11 +174,14 @@ def __init__(self, path, path_in_h5_dataset=None, data_slice=None, transforms=No
         assert 'window_size' in slicing_config_for_name
         assert 'stride' in slicing_config_for_name
 
-        # Read in volume from file
-        volume = iou.fromh5(self.path, self.path_in_h5_dataset,
-                            dataslice=(tuple(self.data_slice)
-                                       if self.data_slice is not None
-                                       else None))
+        # Read in volume from file (can be hdf5, n5 or zarr)
+        dataslice_ = None if self.data_slice is None else tuple(self.data_slice)
+        if self.is_h5(self.path):
+            volume = iou.fromh5(self.path, self.path_in_h5_dataset,
+                                dataslice=dataslice_)
+        else:
+            volume = iou.fromz5(self.path, self.path_in_h5_dataset,
+                                dataslice=dataslice_)
         # Initialize superclass with the volume
         super(HDF5VolumeLoader, self).__init__(volume=volume, name=name, transforms=transforms,
                                                **slicing_config_for_name)

diff --git a/inferno/io/volumetric/volumetric_utils.py b/inferno/io/volumetric/volumetric_utils.py
@@ -49,7 +49,8 @@ def dimension_window(start, stop, wsize, stride, dimsize, ds_dim):
         stops  = [dimsize - wsize if wsize != dimsize else dimsize
                   for dimsize, wsize in zip(shape, window_size)]
 
-    assert all(stp > strt for strt, stp in zip(starts, stops)), "%s, %s" % (str(starts), str(stops))
+    assert all(stp > strt for strt, stp in zip(starts, stops)),\
+        "%s, %s" % (str(starts), str(stops))
     nslices = [dimension_window(start, stop, wsize, stride, dimsize, ds_dim)
                for start, stop, wsize, stride, dimsize, ds_dim
                in zip(starts, stops, window_size, strides, shape, ds)]

diff --git a/inferno/utils/io_utils.py b/inferno/utils/io_utils.py
@@ -13,29 +13,36 @@ def fromh5(path, datapath=None, dataslice=None, asnumpy=True, preptrain=None):
     """
     # Check if path exists (thanks Lukas!)
     assert os.path.exists(path), "Path {} does not exist.".format(path)
-    # Init file
-    h5file = h5.File(path)
-    # Init dataset
-    h5dataset = h5file[datapath] if datapath is not None else h5file.values()[0]
-    # Slice dataset
-    h5dataset = h5dataset[dataslice] if dataslice is not None else h5dataset
-    # Convert to numpy if required
-    h5dataset = np.asarray(h5dataset) if asnumpy else h5dataset
-    # Apply preptrain
-    h5dataset = preptrain(h5dataset) if preptrain is not None else h5dataset
-    # Close file
-    h5file.close()
-    # Return
+    with h5.File(path, 'r') as f:
+        # Init dataset
+        h5dataset = f[datapath] if datapath is not None else f.values()[0]
+        # Slice dataset
+        h5dataset = h5dataset[dataslice] if dataslice is not None else h5dataset
+        # Convert to numpy if required
+        h5dataset = np.asarray(h5dataset) if asnumpy else h5dataset
+        # Apply preptrain
+        h5dataset = preptrain(h5dataset) if preptrain is not None else h5dataset
     return h5dataset
 
 
 # TODO we could also do **h5_kwargs instead
 def toh5(data, path, datapath='data', compression=None, chunks=None):
     """Write `data` to a HDF5 volume."""
-    with h5.File(path, 'w') as f:
+    with h5.File(path) as f:
         f.create_dataset(datapath, data=data, compression=compression, chunks=chunks)
 
 
+def fromz5(path, datapath, dataslice=None, n_threads=8):
+    # we import z5py only here because we don't want to assume that it's in the env
+    import z5py
+    assert os.path.exists(path), "Path {} does not exist.".format(path)
+    with z5py.File(path) as f:
+        ds = f[datapath]
+        ds.n_threads = n_threads
+        data = ds[:] if dataslice is None else ds[dataslice]
+    return data
+
+
 # Yaml to dict reader
 def yaml2dict(path):
     if isinstance(path, dict):