# Image viewer

This notebook is for inspecting timelapse microscopy data, with associated sinhgle-cell labels and tracks, showing the infection of human macrophages with Mycobacterium Tuberculosis (Mtb), acquired on an Opera Phenix confocal microscope. 

In [5]:
import napari
from macrohet import dataio, tile, visualise
import btrack
print(btrack.__version__)
import os
import json 
from tqdm.auto import tqdm
import numpy as np
from natsort import natsorted
import glob
from skimage import io
import zarr
import re
import dask
scale_factor = 2 #5.04
ndim = 2

0.6.1.dev116


In [2]:
os.getpid()

84907

### Load experiment of choice

The Opera Phenix is a high-throughput confocal microscope that acquires very large 5-dimensional (TCZXY) images over several fields of view in any one experiment. Therefore, a lazy-loading approach is chosen to mosaic, view and annotate these images. This approach depends upon Dask and DaskFusion. The first step is to load the main metadata file (typically called `Index.idx.xml` and located in the main `Images` directory) that contains the image filenames and associated TCXZY information used to organise the images.

In [4]:
expt_ID = 'PS0000'
location = 'DATA' # 'NEMO' # 'SYNO'
base_dir = f'/mnt/{location}/macrohet_{location.lower()}/{expt_ID}/'
metadata_fn = os.path.join(base_dir, 'acquisition/Images/Index.idx.xml')
metadata = dataio.read_harmony_metadata(metadata_fn)  
metadata_fn = glob.glob(os.path.join(base_dir, 'acquisition/Assaylayout/*.xml'))[0]
assay_layout = dataio.read_harmony_metadata(metadata_fn, assay_layout=True,)
metadata

Reading metadata XML file...


0it [00:00, ?it/s]

Extracting metadata complete!
Reading metadata XML file...
Extracting metadata complete!


Unnamed: 0,id,State,URL,Row,Col,FieldID,PlaneID,TimepointID,ChannelID,FlimID,...,PositionZ,AbsPositionZ,MeasurementTimeOffset,AbsTime,MainExcitationWavelength,MainEmissionWavelength,ObjectiveMagnification,ObjectiveNA,ExposureTime,OrientationMatrix
0,0303K1F1P1R1,Ok,r03c03f01p01-ch1sk1fk1fl1.tiff,3,3,1,1,0,1,1,...,0,0.135583505,0,2021-04-16T19:09:33.84+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
1,0303K1F1P1R2,Ok,r03c03f01p01-ch2sk1fk1fl1.tiff,3,3,1,1,0,2,1,...,0,0.135583505,0,2021-04-16T19:09:33.84+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
2,0303K1F1P2R1,Ok,r03c03f01p02-ch1sk1fk1fl1.tiff,3,3,1,2,0,1,1,...,2E-06,0.135585502,0,2021-04-16T19:09:34.12+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
3,0303K1F1P2R2,Ok,r03c03f01p02-ch2sk1fk1fl1.tiff,3,3,1,2,0,2,1,...,2E-06,0.135585502,0,2021-04-16T19:09:34.12+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
4,0303K1F1P3R1,Ok,r03c03f01p03-ch1sk1fk1fl1.tiff,3,3,1,3,0,1,1,...,4E-06,0.135587499,0,2021-04-16T19:09:34.4+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113395,0609K75F9P1R2,Ok,r06c09f09p01-ch2sk75fk1fl1.tiff,6,9,9,1,74,2,1,...,0,0.135533601,266399.61,2021-04-19T21:14:19.477+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
113396,0609K75F9P2R1,Ok,r06c09f09p02-ch1sk75fk1fl1.tiff,6,9,9,2,74,1,1,...,2E-06,0.135535598,266399.61,2021-04-19T21:14:19.757+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
113397,0609K75F9P2R2,Ok,r06c09f09p02-ch2sk75fk1fl1.tiff,6,9,9,2,74,2,1,...,2E-06,0.135535598,266399.61,2021-04-19T21:14:19.757+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
113398,0609K75F9P3R1,Ok,r06c09f09p03-ch1sk75fk1fl1.tiff,6,9,9,3,74,1,1,...,4E-06,0.135537595,266399.61,2021-04-19T21:14:20.037+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."


### Now to lazily mosaic the images using Dask prior to saving them out as zarr

In [23]:
rechunked_images

Unnamed: 0,Array,Chunk
Bytes,30.66 GiB,69.77 MiB
Shape,"(75, 2, 3, 6048, 6048)","(1, 1, 1, 6048, 6048)"
Dask graph,450 chunks in 1804 graph layers,450 chunks in 1804 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 30.66 GiB 69.77 MiB Shape (75, 2, 3, 6048, 6048) (1, 1, 1, 6048, 6048) Dask graph 450 chunks in 1804 graph layers Data type uint16 numpy.ndarray",2  75  6048  6048  3,

Unnamed: 0,Array,Chunk
Bytes,30.66 GiB,69.77 MiB
Shape,"(75, 2, 3, 6048, 6048)","(1, 1, 1, 6048, 6048)"
Dask graph,450 chunks in 1804 graph layers,450 chunks in 1804 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [None]:
zarr_fns_to_do =[]
pattern = r'\((\d+), (\d+)\)\.zarr$'
for acq_ID, data in tqdm(assay_layout.iterrows(), total = len(assay_layout)):
    # break
    zarr_fn = f'/mnt/SYNO/macrohet_syno/{expt_ID}/acquisition/zarr/{acq_ID}.zarr'
    if not os.path.exists(zarr_fn):
        zarr_fns_to_do.append(zarr_fn)

for zarr_fn in tqdm(zarr_fns_to_do, total = len(zarr_fns_to_do), desc = 'Iterating over zarr fns to do'):
    # Use re.search to find the match in the zarr_fn
    match = re.search(pattern, zarr_fn)
    # Get the matched tuple values as integers
    row, column = acq_ID = (int(match.group(1)), int(match.group(2)))

    image_dir = os.path.join(base_dir, 'acquisition/Images')
    try:
        dask_images = tile.compile_mosaic(image_dir, 
                                     metadata, 
                                     row, column, 
                                     # set_channel=1,set_plane=1,set_time=1
                                    ).compute()
    
        data = assay_layout.loc[acq_ID]
        
        zarr_group = zarr.open(zarr_fn, mode='w')
        acq_metadata = data.to_dict()
        acq_metadata['Acquisition ID'] = acq_ID
        acq_metadata['Experiment ID'] = expt_ID
        acq_metadata['Dimensionality'] = 'TCZYX'
        rechunked_images = dask_images.rechunk((1, 1, 1, 6048, 6048))
        print('Loading image')
        # rechunked_images = rechunked_images
        # Save Dask array to Zarr
        print('Saving zarr')
        dask.array.to_zarr(rechunked_images, zarr_fn, component='images')
        
        # Store metadata in the Zarr group's attributes
        zarr_group.attrs['metadata'] = acq_metadata
    except Exception as e:
        print(f"An error occurred: {e}")
        
        print(acq_ID, e)
        continue

  0%|          | 0/24 [00:00<?, ?it/s]

Iterating over zarr fns to do:   0%|          | 0/24 [00:00<?, ?it/s]

Loading image
Saving zarr
Loading image
Saving zarr
Loading image
Saving zarr


In [60]:
dask_images

Unnamed: 0,Array,Chunk
Bytes,69.77 MiB,8.90 MiB
Shape,"(1, 1, 1, 6048, 6048)","(1, 1, 1, 2160, 2160)"
Dask graph,9 chunks in 6 graph layers,9 chunks in 6 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 69.77 MiB 8.90 MiB Shape (1, 1, 1, 6048, 6048) (1, 1, 1, 2160, 2160) Dask graph 9 chunks in 6 graph layers Data type uint16 numpy.ndarray",1  1  6048  6048  1,

Unnamed: 0,Array,Chunk
Bytes,69.77 MiB,8.90 MiB
Shape,"(1, 1, 1, 6048, 6048)","(1, 1, 1, 2160, 2160)"
Dask graph,9 chunks in 6 graph layers,9 chunks in 6 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [77]:
dask_images = tile.compile_mosaic(image_dir, 
                                     metadata, 
                                     row, column, 
                                     set_channel=1,set_plane=1,set_time=1
                                    ).compute()
    
data = assay_layout.loc[acq_ID]

zarr_group = zarr.open(zarr_fn, mode='w')
acq_metadata = data.to_dict()
acq_metadata['Acquisition ID'] = acq_ID
acq_metadata['Experiment ID'] = expt_ID
acq_metadata['Dimensionality'] = 'TCZYX'
rechunked_images = dask_images.rechunk((1, 1, 1, 6048, 6048))
print('Loading image')
# rechunked_images = rechunked_images
# Save Dask array to Zarr
print('Saving zarr')
dask.array.to_zarr(rechunked_images, zarr_fn, component='images')

# Store metadata in the Zarr group's attributes
zarr_group.attrs['metadata'] = acq_metadata

Loading image
Saving zarr


In [68]:
zarr_group.attrs['metadata']

{'Strain': 'WT',
 'Compound': 'PZA',
 'Concentration': '60',
 'ConcentrationEC': 'EC50',
 'Acquisition ID': (3, 6),
 'Experiment ID': 'PS0000',
 'Dimensionality': 'TCZYX'}

In [78]:
zarr_group_2 = zarr.open(zarr_fn, mode='r')

In [79]:
zarr_group_2.images

<zarr.core.Array '/images' (1, 1, 1, 6048, 6048) uint16 read-only>

In [76]:
zarr_group_2.attrs['metadata']

{'Acquisition ID': [3, 6],
 'Compound': 'PZA',
 'Concentration': '60',
 'ConcentrationEC': 'EC50',
 'Dimensionality': 'TCZYX',
 'Experiment ID': 'PS0000',
 'Strain': 'WT'}

In [123]:
missing_files = []
for fn in tqdm(subset_df['URL']):
    if not os.path.exists(os.path.join(image_dir, fn)):
        missing_files.append(fn)

  0%|          | 0/7950 [00:00<?, ?it/s]

### testing with compression

In [82]:
# Loop through assay_layout
for (row, column), data in tqdm(assay_layout.iterrows(), total=len(assay_layout)):

    if (row, column) == (3,4):
        continue

    # First set of images without compression
    output_fn = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/acquisition/zarr/{row, column}.zarr'
    os.makedirs(os.path.dirname(output_fn), exist_ok=True)    
    # if not os.path.exists(output_fn):

    images = tile.compile_mosaic(image_dir, metadata, row, column).compute()

    store = zarr.DirectoryStore(output_fn)

    # rechunk for saving 
    # images = images.rechunk((150, 2, 3, 2016, 2016)).
    images = images.rechunk((150, 2, 3, 1000, 1000))  # Adjust the chunk sizes as needed

    # Save Dask array to Zarr without compression
    images.to_zarr(store, overwrite=True, group='images', compute=True, codec=zarr.Blosc(cname='lz4', clevel=5))
    
    # images.to_zarr(store, overwrite=True, group='images', compute=True)

    zarr_group = zarr.open(store)
    zarr_group.attrs['Row'] = row
    zarr_group.attrs['Column'] = column
    for key, i in zip(data.keys(), data):
        zarr_group.attrs[key] = i

        # # Second set of images with Blosc-LZ4-Bitshuffle-8 compression
        # output_fn_compressed = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/acquisition/zarr/compressed/{row, column}_compressed.zarr'
        # # make dirs
        # os.makedirs(os.path.dirname(output_fn_compressed), exist_ok=True)    
        
        # store_compressed = zarr.DirectoryStore(output_fn_compressed)
    
        # # Specify Blosc-LZ4-Bitshuffle-8 compression options
        # compressor = zarr.Blosc(cname='blosclz', clevel=5, shuffle=zarr.Blosc.BITSHUFFLE)
        
        # # Save Dask array to Zarr with Blosc-LZ4-Bitshuffle-8 compression
        # images.to_zarr(store_compressed, overwrite=True, group='images', compressor=compressor,)# compute=True,)
    
        # zarr_group_compressed = zarr.open(store_compressed)
        # zarr_group_compressed.attrs['Row'] = row
        # zarr_group_compressed.attrs['Column'] = column
        # for key, i in zip(data.keys(), data):
        #     zarr_group_compressed.attrs[key] = i

  0%|          | 0/42 [00:00<?, ?it/s]

  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ign

ValueError: total size of new array must be unchanged