In [1]:
########################################################################
# File       : CZI-ZARR Save Dask Array.ipynb
# Version    : 0.1
# Author     : czsrh
# Date       : 12.11.2019
# Insitution : Carl Zeiss Microscopy GmbH
#
# Disclaimer: Just for testing - Use at your own risk.
# Feedback or Improvements are welcome.
########################################################################

This notebook was mainly inspired by the following blogposts:

[Load Large Image Data with Dask Array](https://blog.dask.org/2019/06/20/load-image-data)

[Introducing napari: a fast n-dimensional image viewer in Python](https://ilovesymposia.com/2019/10/24/introducing-napari-a-fast-n-dimensional-image-viewer-in-python)

In [2]:
# this can be used to switch on/off warnings
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# import the libraries mentioned above
from apeer_ometiff_library import io, processing, omexmlClass
import czifile as zis
import xmltodict
import os
import time
import numpy as np
import ipywidgets as widgets
import napari
import imgfileutils as imf
import xml.etree.ElementTree as ET
import zarr
import dask
import dask.array as da
import glob

In [3]:
# the directory contains 96 scenes of a wellplate as individual CZI files
# which where created by SplitScenesWriteFiles

# get list of all filenames
#filenames = glob.glob(r'c:\Users\m1srh\Documents\Testdata_Zeiss\Castor\EMBL\96well\testwell96_Single_CZI\*.czi')
filenames = glob.glob(r'/datadisk1/tuxedo/testpictures/Testdata_Zeiss/wellplate/single_czi/*.czi')

# show number of files
len(filenames)

96

In [4]:
def get_czi_array(filename):
    # get the array and the metadata
    array, metadata = imf.get_array_czi(filename)
    
    return array

metadata, add_metadata = imf.get_metadata_czi(filenames[0])

# get the required shape of the resulting array - assumption here is that all scenes have the same shape
array_shape = metadata['Shape'][:-1]

# get the required pixel type for such an array
array_dtype = metadata['NumPy.dtype']
print(array_shape)
print(array_dtype)

# find the indes for the Scenes dimensions from the dimstring
dims_dict, dimindex_list, numvalid_dims = imf.get_dimorder(metadata['Axes'])
dims_dict['S']

# lazy reading
lazy_arrays = [dask.delayed(get_czi_array)(fn) for fn in filenames]
lazy_arrays = [da.from_delayed(x, shape=array_shape, dtype=array_dtype) for x in lazy_arrays]

Key not found: 0
No Scence or Well Information detected:
(1, 1, 1, 1920, 1920)
uint8


In [5]:
# look at a singe array
lazy_arrays[0]

Unnamed: 0,Array,Chunk
Bytes,11.10 MB,11.10 MB
Shape,"(1, 1, 2, 1416, 1960)","(1, 1, 2, 1416, 1960)"
Count,2 Tasks,1 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 11.10 MB 11.10 MB Shape (1, 1, 2, 1416, 1960) (1, 1, 2, 1416, 1960) Count 2 Tasks 1 Chunks Type uint16 numpy.ndarray",1  1  1960  1416  2,

Unnamed: 0,Array,Chunk
Bytes,11.10 MB,11.10 MB
Shape,"(1, 1, 2, 1416, 1960)","(1, 1, 2, 1416, 1960)"
Count,2 Tasks,1 Chunks
Type,uint16,numpy.ndarray


In [6]:
# concatenate first n array - in this case along the scenes dimension
full_array = da.concatenate(lazy_arrays[:], axis=dims_dict['S'])

In [7]:
# show full dask array
full_array

Unnamed: 0,Array,Chunk
Bytes,1.07 GB,11.10 MB
Shape,"(1, 96, 2, 1416, 1960)","(1, 1, 2, 1416, 1960)"
Count,288 Tasks,96 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 1.07 GB 11.10 MB Shape (1, 96, 2, 1416, 1960) (1, 1, 2, 1416, 1960) Count 288 Tasks 96 Chunks Type uint16 numpy.ndarray",96  1  1960  1416  2,

Unnamed: 0,Array,Chunk
Bytes,1.07 GB,11.10 MB
Shape,"(1, 96, 2, 1416, 1960)","(1, 1, 2, 1416, 1960)"
Count,288 Tasks,96 Chunks
Type,uint16,numpy.ndarray


In [9]:
use_compression = False

# construct new filename for dask array
zarr_arrayname = os.path.join( os.path.dirname(filenames[0]), 'testwell96.zarr')
print('Try to save to : ', zarr_arrayname)

# save to ZARR array if not already existing
if os.path.exists(zarr_arrayname):
    print('Dask Array already exits. Do not overwrite.')
if not os.path.exists(zarr_arrayname):
    
    print('Saving ZARR Array to : ', zarr_arrayname)
    
    # write data to disk using dask array
    if use_compression:
        from numcodecs import Blosc
        # save with compression
        full_array.to_zarr(zarr_arrayname, compressor=Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE))
    
    if not use_compression:
        # just use the "simple save" method
        full_array.to_zarr(zarr_arrayname)

Try to save to :  c:\Users\m1srh\Documents\Testdata_Zeiss\Castor\EMBL\96well\testwell96_Single_CZI\testwell96.zarr
Dask Array already exits. Do not overwrite.


In [10]:
# read image back from ZARR array
zarr_image = da.from_zarr(zarr_arrayname)

print('Array Type  : ', type(zarr_image))
print('Array Shape : ', zarr_image.shape)

Array Type  :  <class 'dask.array.core.Array'>
Array Shape :  (1, 96, 2, 1416, 1960)


In [11]:
# switch to qt5 backend for napari viewer and wait a few seconds

%gui qt5
time.sleep(5)

In [13]:
# initialize Napari Viewer and add the two channels as layes
viewer = napari.Viewer()
viewer.add_image(zarr_image[:, :, 0, :, :], name='A568', colormap='red', blending='additive')
viewer.add_image(zarr_image[:, :, 1, :, :], name='A488', colormap='green', blending='additive')

<Image layer 'A488' at 0x1168b79cd88>

jupyter nbconvert CZI-ZARR Save Dask Array.ipynb --to slides --post serve