# Pack as Zarr

In [1]:
import zarr
import numpy as np
from pathlib import Path
from funlib.persistence import prepare_ds
from funlib.geometry import Coordinate
import json
from tifffile import imread


In [3]:
# List all folders
folders = [p for p in Path('.').glob('*/') if p.is_dir() and not "." in p.name] # avoid .ipynb folder and .zarr
print(folders)

[PosixPath('crop_03'), PosixPath('crop_04'), PosixPath('crop_05'), PosixPath('crop_02'), PosixPath('crop_07'), PosixPath('crop_00'), PosixPath('crop_01'), PosixPath('crop_06')]


In [4]:
# load metada.json
metadata = json.load(open('metadata.json'))
print(metadata)

{'pixel_nm': [4, 4, 4], 'crops': {'crop_00': {'z': 0, 'y': 0, 'x': 0}, 'crop_01': {'z': 256, 'y': 0, 'x': 0}, 'crop_02': {'z': 0, 'y': 256, 'x': 0}, 'crop_03': {'z': 256, 'y': 256, 'x': 0}, 'crop_04': {'z': 0, 'y': 0, 'x': 256}, 'crop_05': {'z': 256, 'y': 0, 'x': 256}, 'crop_06': {'z': 0, 'y': 256, 'x': 256}, 'crop_07': {'z': 256, 'y': 256, 'x': 256}}}


In [5]:
# split train and test
train_folders = folders[:-2]
test_folders = folders[-2:]

In [10]:
def add_to_zarr(zarr_file, folders, file_metadata):
    axis_names = ["z", "y", "x"]
    axis_names_labels = ["c^", "z", "y", "x"]
    units = ["nm", "nm", "nm"]
    voxel_size = Coordinate(*file_metadata['pixel_nm'])

    # save train
    for folder in folders:
        name = folder.name

        # load crop and labels
        crop = imread(folder / 'crop.tif')
        labels = imread(folder / 'labels.tif')

        # metadata
        offset = Coordinate(
            file_metadata['crops'][name]["z"],
            file_metadata['crops'][name]["y"],
            file_metadata['crops'][name]["x"]
        )

        # create crop group
        store = zarr.DirectoryStore(zarr_file)
        root = zarr.group(store=store)
        root.create_group(f"{folder.name}")

        # save crop and labels
        zarr_array = prepare_ds(
            zarr_file + f"/{folder.name}/raw",
            crop.shape,
            offset=offset,
            voxel_size=voxel_size,
            axis_names=axis_names,
            units=units,
            mode="w",
            dtype=np.uint8,
        )
        zarr_array[zarr_array.roi] = crop

        zarr_array = prepare_ds(
            zarr_file + f"/{folder.name}/labels",
            labels.shape,
            offset=offset,
            voxel_size=voxel_size,
            axis_names=axis_names_labels,
            units=units,
            mode="w",
            dtype=np.uint8,
        )
        zarr_array[zarr_array.roi] = labels

In [11]:
# create zarr file
data = "image.zarr"
store = zarr.DirectoryStore(data)
root = zarr.group(store=store)

# Create groups for 'test' and 'train'
test_group = root.create_group("test", overwrite=True)
train_group = root.create_group("train", overwrite=True)

# add folders
add_to_zarr(f"{data}/train", train_folders, metadata)
add_to_zarr(f"{data}/test", test_folders, metadata)

In [12]:
# show zarr
print(zarr.open(Path(".") / data).tree())

/
 ├── test
 │   ├── crop_01
 │   │   ├── labels (2, 256, 256, 256) uint8
 │   │   └── raw (256, 256, 256) uint8
 │   └── crop_06
 │       ├── labels (2, 256, 256, 256) uint8
 │       └── raw (256, 256, 256) uint8
 └── train
     ├── crop_00
     │   ├── labels (2, 256, 256, 256) uint8
     │   └── raw (256, 256, 256) uint8
     ├── crop_02
     │   ├── labels (2, 256, 256, 256) uint8
     │   └── raw (256, 256, 256) uint8
     ├── crop_03
     │   ├── labels (2, 256, 256, 256) uint8
     │   └── raw (256, 256, 256) uint8
     ├── crop_04
     │   ├── labels (2, 256, 256, 256) uint8
     │   └── raw (256, 256, 256) uint8
     ├── crop_05
     │   ├── labels (2, 256, 256, 256) uint8
     │   └── raw (256, 256, 256) uint8
     └── crop_07
         ├── labels (2, 256, 256, 256) uint8
         └── raw (256, 256, 256) uint8
