# Image viewer

This notebook is for inspecting timelapse microscopy data, with associated sinhgle-cell labels and tracks, showing the infection of human macrophages with Mycobacterium Tuberculosis (Mtb), acquired on an Opera Phenix confocal microscope. 

In [83]:
import os
os.getpid()

17418

In [41]:
import napari
from macrohet import dataio, tile, visualise
import btrack
print(btrack.__version__)
import os
import json 
from tqdm.auto import tqdm
import numpy as np
from natsort import natsorted
import glob
from skimage import io
import zarr
scale_factor = 2 #5.04
ndim = 2

0.6.1.dev116


### Load experiment of choice

The Opera Phenix is a high-throughput confocal microscope that acquires very large 5-dimensional (TCZXY) images over several fields of view in any one experiment. Therefore, a lazy-loading approach is chosen to mosaic, view and annotate these images. This approach depends upon Dask and DaskFusion. The first step is to load the main metadata file (typically called `Index.idx.xml` and located in the main `Images` directory) that contains the image filenames and associated TCXZY information used to organise the images.

In [43]:
expt_ID = 'ND0002'
base_dir = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/'
metadata_fn = os.path.join(base_dir, 'acquisition/Images/Index.idx.xml')
metadata = dataio.read_harmony_metadata(metadata_fn)  
metadata

Reading metadata XML file...


Extracting HarmonyV5 metadata:   0%|          | 0/388620 [00:00<?, ?it/s]

Extracting metadata complete!


Unnamed: 0,id,State,URL,Row,Col,FieldID,PlaneID,TimepointID,ChannelID,FlimID,...,PositionZ,AbsPositionZ,MeasurementTimeOffset,AbsTime,MainExcitationWavelength,MainEmissionWavelength,ObjectiveMagnification,ObjectiveNA,ExposureTime,OrientationMatrix
0,0103K1F1P1R1,Ok,r01c03f01p01-ch1sk1fk1fl1.tiff,1,3,1,1,0,1,1,...,-2E-06,0.135466397,0,2023-11-30T17:22:09.49+00:00,640,706,40,1.1,0.2,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
1,0103K1F1P1R2,Ok,r01c03f01p01-ch2sk1fk1fl1.tiff,1,3,1,1,0,2,1,...,-2E-06,0.135466397,0,2023-11-30T17:22:09.723+00:00,488,522,40,1.1,0.1,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
2,0103K1F1P2R1,Ok,r01c03f01p02-ch1sk1fk1fl1.tiff,1,3,1,2,0,1,1,...,0,0.135468394,0,2023-11-30T17:22:10.067+00:00,640,706,40,1.1,0.2,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
3,0103K1F1P2R2,Ok,r01c03f01p02-ch2sk1fk1fl1.tiff,1,3,1,2,0,2,1,...,0,0.135468394,0,2023-11-30T17:22:10.287+00:00,488,522,40,1.1,0.1,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
4,0103K1F1P3R1,Ok,r01c03f01p03-ch1sk1fk1fl1.tiff,1,3,1,3,0,1,1,...,2E-06,0.135470405,0,2023-11-30T17:22:10.627+00:00,640,706,40,1.1,0.2,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
388615,0612K150F9P1R2,Ok,r06c12f09p01-ch2sk150fk1fl1.tiff,6,12,9,1,149,2,1,...,-2E-06,0.1351538,268191.66,2023-12-03T20:06:16.08+00:00,488,522,40,1.1,0.1,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
388616,0612K150F9P2R1,Ok,r06c12f09p02-ch1sk150fk1fl1.tiff,6,12,9,2,149,1,1,...,0,0.135155797,268191.66,2023-12-03T20:06:16.423+00:00,640,706,40,1.1,0.2,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
388617,0612K150F9P2R2,Ok,r06c12f09p02-ch2sk150fk1fl1.tiff,6,12,9,2,149,2,1,...,0,0.135155797,268191.66,2023-12-03T20:06:16.657+00:00,488,522,40,1.1,0.1,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."
388618,0612K150F9P3R1,Ok,r06c12f09p03-ch1sk150fk1fl1.tiff,6,12,9,3,149,1,1,...,2E-06,0.135157794,268191.66,2023-12-03T20:06:17+00:00,640,706,40,1.1,0.2,"[[1.000989,0,0,10.0],[0,-1.000989,0,-6.8],[0,0..."


### View assay layout and mask information (optional)

The Opera Phenix acquires many time lapse series from a range of positions. The first step is to inspect the image metadata, presented in the form of an `Assaylayout/experiment_ID.xml` file, to show which positions correspond to which experimental assays.

In [45]:
metadata_path = glob.glob(os.path.join(base_dir, 'acquisition/Assaylayout/*AssayLayout.xml'))[0]
assay_layout = dataio.read_harmony_metadata(metadata_path, assay_layout=True,)# mask_exist=True,  image_dir = image_dir, image_metadata = metadata)
assay_layout

Reading metadata XML file...
Extracting metadata complete!


Unnamed: 0_level_0,Unnamed: 1_level_0,Strain,Compound,Concentration,ConcentrationEC
Row,Column,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,1,UNI,CTRL,0.0,EC0
3,2,UNI,CTRL,0.0,EC0
3,3,WT,CTRL,0.0,EC0
3,4,WT,CTRL,0.0,EC0
3,5,WT,PZA,60.0,EC50
3,6,WT,PZA,60.0,EC50
3,7,WT,RIF,0.1,EC50
3,8,WT,RIF,0.1,EC50
3,9,WT,INH,0.04,EC50
3,10,WT,INH,0.04,EC50


### Define row and column of choice

In [71]:
acq_ID = row, column = (3, 4)

### Now to lazily mosaic the images using Dask prior to viewing them.

1x (75,2,3) [TCZ] image stack takes approximately 1 minute to stitch together, so only load the one field of view I want.

In [47]:
image_dir = os.path.join(base_dir, 'acquisition/Images')
images = tile.compile_mosaic(image_dir, 
                             metadata, 
                             row, column, 
                             # set_plane=1,
                             # set_channel=1,
#                              set_time = 66,
#                             input_transforms = [input_transforms]
                            )#.compute().compute()

In [48]:
images

Unnamed: 0,Array,Chunk
Bytes,61.32 GiB,53.39 MiB
Shape,"(150, 2, 3, 6048, 6048)","(1, 2, 3, 2160, 2160)"
Dask graph,1350 chunks in 3603 graph layers,1350 chunks in 3603 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 61.32 GiB 53.39 MiB Shape (150, 2, 3, 6048, 6048) (1, 2, 3, 2160, 2160) Dask graph 1350 chunks in 3603 graph layers Data type uint16 numpy.ndarray",2  150  6048  6048  3,

Unnamed: 0,Array,Chunk
Bytes,61.32 GiB,53.39 MiB
Shape,"(150, 2, 3, 6048, 6048)","(1, 2, 3, 2160, 2160)"
Dask graph,1350 chunks in 3603 graph layers,1350 chunks in 3603 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [33]:
target_chunk_size = (1, 2, 1, 2160, 2160) 

In [34]:
images = images.rechunk(target_chunk_size)

In [76]:
images

Unnamed: 0,Array,Chunk
Bytes,61.32 GiB,23.26 MiB
Shape,"(150, 2, 3, 6048, 6048)","(1, 1, 3, 2016, 2016)"
Dask graph,7500 chunks in 24451 graph layers,7500 chunks in 24451 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 61.32 GiB 23.26 MiB Shape (150, 2, 3, 6048, 6048) (1, 1, 3, 2016, 2016) Dask graph 7500 chunks in 24451 graph layers Data type uint16 numpy.ndarray",2  150  6048  6048  3,

Unnamed: 0,Array,Chunk
Bytes,61.32 GiB,23.26 MiB
Shape,"(150, 2, 3, 6048, 6048)","(1, 1, 3, 2016, 2016)"
Dask graph,7500 chunks in 24451 graph layers,7500 chunks in 24451 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


### testing with compression

In [82]:
# Loop through assay_layout
for (row, column), data in tqdm(assay_layout.iterrows(), total=len(assay_layout)):

    if (row, column) == (3,4):
        continue

    # First set of images without compression
    output_fn = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/acquisition/zarr/{row, column}.zarr'
    os.makedirs(os.path.dirname(output_fn), exist_ok=True)    
    # if not os.path.exists(output_fn):

    images = tile.compile_mosaic(image_dir, metadata, row, column).compute()

    store = zarr.DirectoryStore(output_fn)

    # rechunk for saving 
    # images = images.rechunk((150, 2, 3, 2016, 2016)).
    images = images.rechunk((150, 2, 3, 1000, 1000))  # Adjust the chunk sizes as needed

    # Save Dask array to Zarr without compression
    images.to_zarr(store, overwrite=True, group='images', compute=True, codec=zarr.Blosc(cname='lz4', clevel=5))
    
    # images.to_zarr(store, overwrite=True, group='images', compute=True)

    zarr_group = zarr.open(store)
    zarr_group.attrs['Row'] = row
    zarr_group.attrs['Column'] = column
    for key, i in zip(data.keys(), data):
        zarr_group.attrs[key] = i

        # # Second set of images with Blosc-LZ4-Bitshuffle-8 compression
        # output_fn_compressed = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/acquisition/zarr/compressed/{row, column}_compressed.zarr'
        # # make dirs
        # os.makedirs(os.path.dirname(output_fn_compressed), exist_ok=True)    
        
        # store_compressed = zarr.DirectoryStore(output_fn_compressed)
    
        # # Specify Blosc-LZ4-Bitshuffle-8 compression options
        # compressor = zarr.Blosc(cname='blosclz', clevel=5, shuffle=zarr.Blosc.BITSHUFFLE)
        
        # # Save Dask array to Zarr with Blosc-LZ4-Bitshuffle-8 compression
        # images.to_zarr(store_compressed, overwrite=True, group='images', compressor=compressor,)# compute=True,)
    
        # zarr_group_compressed = zarr.open(store_compressed)
        # zarr_group_compressed.attrs['Row'] = row
        # zarr_group_compressed.attrs['Column'] = column
        # for key, i in zip(data.keys(), data):
        #     zarr_group_compressed.attrs[key] = i

  0%|          | 0/42 [00:00<?, ?it/s]

  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ign

ValueError: total size of new array must be unchanged