# Image viewer

This notebook is for inspecting timelapse microscopy data, with associated sinhgle-cell labels and tracks, showing the infection of human macrophages with Mycobacterium Tuberculosis (Mtb), acquired on an Opera Phenix confocal microscope. 

In [1]:
import os
import json
import re
import glob
import logging

import numpy as np
from natsort import natsorted
from tqdm.auto import tqdm
from skimage import io
import dask
import zarr

import napari
from macrohet import dataio, tile, visualise
import btrack


scale_factor = 2 #5.04
ndim = 2

In [2]:
os.getpid()

491393

### Load experiment of choice

The Opera Phenix is a high-throughput confocal microscope that acquires very large 5-dimensional (TCZXY) images over several fields of view in any one experiment. Therefore, a lazy-loading approach is chosen to mosaic, view and annotate these images. This approach depends upon Dask and DaskFusion. The first step is to load the main metadata file (typically called `Index.idx.xml` and located in the main `Images` directory) that contains the image filenames and associated TCXZY information used to organise the images.

In [3]:
expt_ID = 'ND0004'
location = 'SYNO' # 'NEMO' # 'SYNO'
base_dir = f'/mnt/{location}/macrohet_{location.lower()}/data/{expt_ID}/'
metadata_fn = os.path.join(base_dir, 'acquisition/Images/Index.idx.xml')
metadata = dataio.read_harmony_metadata(metadata_fn)  
metadata_fn = glob.glob(os.path.join(base_dir, 'acquisition/Assaylayout/*.xml'))[0]
assay_layout = dataio.read_harmony_metadata(metadata_fn, assay_layout=True,)
metadata

Reading metadata XML file...


0it [00:00, ?it/s]

Extracting metadata complete!
Reading metadata XML file...
Extracting metadata complete!


Unnamed: 0,id,State,URL,Row,Col,FieldID,PlaneID,TimepointID,ChannelID,FlimID,...,PositionZ,AbsPositionZ,MeasurementTimeOffset,AbsTime,MainExcitationWavelength,MainEmissionWavelength,ObjectiveMagnification,ObjectiveNA,ExposureTime,OrientationMatrix
0,0301K1F1P1R1,Ok,r03c01f01p01-ch1sk1fk1fl1.tiff,3,1,1,1,0,1,1,...,0,0.135243401,0,2024-04-17T15:13:28.903+01:00,640,706,40,1.1,0.2,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
1,0301K1F1P1R2,Ok,r03c01f01p01-ch2sk1fk1fl1.tiff,3,1,1,1,0,2,1,...,0,0.135243401,0,2024-04-17T15:13:29.123+01:00,488,522,40,1.1,0.1,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
2,0301K1F1P2R1,Ok,r03c01f01p02-ch1sk1fk1fl1.tiff,3,1,1,2,0,1,1,...,2E-06,0.135245398,0,2024-04-17T15:13:29.467+01:00,640,706,40,1.1,0.2,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
3,0301K1F1P2R2,Ok,r03c01f01p02-ch2sk1fk1fl1.tiff,3,1,1,2,0,2,1,...,2E-06,0.135245398,0,2024-04-17T15:13:29.7+01:00,488,522,40,1.1,0.1,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
4,0301K1F1P3R1,Ok,r03c01f01p03-ch1sk1fk1fl1.tiff,3,1,1,3,0,1,1,...,4E-06,0.135247394,0,2024-04-17T15:13:30.043+01:00,640,706,40,1.1,0.2,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349267,0612K154F9P1R2,Ok,r06c12f09p01-ch2sk154fk1fl1.tiff,6,12,9,1,153,2,1,...,0,0.1350099,275404.583,2024-04-20T19:57:46.98+01:00,488,522,40,1.1,0.1,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
349268,0612K154F9P2R1,Ok,r06c12f09p02-ch1sk154fk1fl1.tiff,6,12,9,2,153,1,1,...,2E-06,0.135011896,275404.583,2024-04-20T19:57:47.323+01:00,640,706,40,1.1,0.2,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
349269,0612K154F9P2R2,Ok,r06c12f09p02-ch2sk154fk1fl1.tiff,6,12,9,2,153,2,1,...,2E-06,0.135011896,275404.583,2024-04-20T19:57:47.557+01:00,488,522,40,1.1,0.1,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."
349270,0612K154F9P3R1,Ok,r06c12f09p03-ch1sk154fk1fl1.tiff,6,12,9,3,153,1,1,...,4E-06,0.135013893,275404.583,2024-04-20T19:57:47.9+01:00,640,706,40,1.1,0.2,"[[0.999619,0,0,-5.1],[0,-0.999619,0,-2.5],[0,0..."


### Now to lazily mosaic the images using Dask prior to saving them out as zarr

In [10]:
zarr_fns_to_do =[]
pattern = r'\((\d+), (\d+)\)\.zarr$'
for acq_ID, data in tqdm(assay_layout.iterrows(), total = len(assay_layout)):
    # break
    zarr_fn = f'/mnt/SYNO/macrohet_syno/data/{expt_ID}/acquisition/zarr/{acq_ID}.zarr'
    if not os.path.exists(zarr_fn):
        zarr_dir = (os.path.dirname(zarr_fn))
        os.makedirs(zarr_dir, exist_ok = True)
        zarr_fns_to_do.append(zarr_fn)

  0%|          | 0/42 [00:00<?, ?it/s]

In [11]:
zarr_fns_to_do

['/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 4).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 5).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 6).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 7).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 8).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 9).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 10).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 11).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(4, 12).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(5, 3).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(5, 4).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(5, 5).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(5, 6).zarr',
 '/mnt/SYNO/macrohet_syno/data/ND0004/acquisition/zarr/(5, 7).zarr',
 '/mnt/SYNO/macrohet_syno/data/

In [12]:
len(zarr_fns_to_do)

29

In [None]:
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger()

for zarr_fn in tqdm(zarr_fns_to_do, total=len(zarr_fns_to_do), desc='Iterating over zarr fns to do'):
    match = re.search(pattern, zarr_fn)
    row, column = acq_ID = (int(match.group(1)), int(match.group(2)))

    image_dir = os.path.join(base_dir, 'acquisition/Images')
    try:
        logger.info(f"Processing acquisition ID: {acq_ID}")
        
        logger.info(f"Compiling mosaic for row {row}, column {column} from {image_dir}")
        dask_images = tile.compile_mosaic(image_dir, metadata, row, column).compute()
        logger.info(f"Mosaic compiled successfully for acquisition ID: {acq_ID}")
        
        data = assay_layout.loc[acq_ID]
        logger.info(f"Retrieved assay layout data for acquisition ID: {acq_ID}")
        
        zarr_group = zarr.open(zarr_fn, mode='w')
        logger.info(f"Opened Zarr file: {zarr_fn}")
        
        acq_metadata = data.to_dict()
        acq_metadata['Acquisition ID'] = acq_ID
        acq_metadata['Experiment ID'] = expt_ID
        acq_metadata['Dimensionality'] = 'TCZYX'
        
        logger.info(f"Rechunking images for acquisition ID: {acq_ID}")
        rechunked_images = dask_images.rechunk((1, 1, 1, 6048, 6048))
        
        logger.info(f"Loading image into Zarr for acquisition ID: {acq_ID}")
        dask.array.to_zarr(rechunked_images, zarr_fn, component='images')
        logger.info(f"Image loaded into Zarr for acquisition ID: {acq_ID}")
        
        zarr_group.attrs['metadata'] = acq_metadata
        logger.info(f"Metadata saved for acquisition ID: {acq_ID}")
        
        logger.info(f"Successfully processed acquisition ID: {acq_ID}")
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        logger.error(f"Acquisition ID: {acq_ID}, Error: {e}")
        continue

Iterating over zarr fns to do:   0%|          | 0/29 [00:00<?, ?it/s]

INFO:root:Processing acquisition ID: (4, 4)
INFO:root:Compiling mosaic for row 4, column 4 from /mnt/SYNO/macrohet_syno/data/ND0004/acquisition/Images


## Quick check that it worked

In [None]:
test

In [83]:
zarr_group = zarr.open(zarr_fn)

<zarr.attrs.Attributes at 0x7fad6e499190>

In [68]:
zarr_group.attrs['metadata']

{'Strain': 'WT',
 'Compound': 'PZA',
 'Concentration': '60',
 'ConcentrationEC': 'EC50',
 'Acquisition ID': (3, 6),
 'Experiment ID': 'PS0000',
 'Dimensionality': 'TCZYX'}

### testing with compression

In [82]:
# Loop through assay_layout
for (row, column), data in tqdm(assay_layout.iterrows(), total=len(assay_layout)):

    if (row, column) == (3,4):
        continue

    # First set of images without compression
    output_fn = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/acquisition/zarr/{row, column}.zarr'
    os.makedirs(os.path.dirname(output_fn), exist_ok=True)    
    # if not os.path.exists(output_fn):

    images = tile.compile_mosaic(image_dir, metadata, row, column).compute()

    store = zarr.DirectoryStore(output_fn)

    # rechunk for saving 
    # images = images.rechunk((150, 2, 3, 2016, 2016)).
    images = images.rechunk((150, 2, 3, 1000, 1000))  # Adjust the chunk sizes as needed

    # Save Dask array to Zarr without compression
    images.to_zarr(store, overwrite=True, group='images', compute=True, codec=zarr.Blosc(cname='lz4', clevel=5))
    
    # images.to_zarr(store, overwrite=True, group='images', compute=True)

    zarr_group = zarr.open(store)
    zarr_group.attrs['Row'] = row
    zarr_group.attrs['Column'] = column
    for key, i in zip(data.keys(), data):
        zarr_group.attrs[key] = i

        # # Second set of images with Blosc-LZ4-Bitshuffle-8 compression
        # output_fn_compressed = f'/mnt/NEMO/home/users/dayn/macrohet_nemo/{expt_ID}/acquisition/zarr/compressed/{row, column}_compressed.zarr'
        # # make dirs
        # os.makedirs(os.path.dirname(output_fn_compressed), exist_ok=True)    
        
        # store_compressed = zarr.DirectoryStore(output_fn_compressed)
    
        # # Specify Blosc-LZ4-Bitshuffle-8 compression options
        # compressor = zarr.Blosc(cname='blosclz', clevel=5, shuffle=zarr.Blosc.BITSHUFFLE)
        
        # # Save Dask array to Zarr with Blosc-LZ4-Bitshuffle-8 compression
        # images.to_zarr(store_compressed, overwrite=True, group='images', compressor=compressor,)# compute=True,)
    
        # zarr_group_compressed = zarr.open(store_compressed)
        # zarr_group_compressed.attrs['Row'] = row
        # zarr_group_compressed.attrs['Column'] = column
        # for key, i in zip(data.keys(), data):
        #     zarr_group_compressed.attrs[key] = i

  0%|          | 0/42 [00:00<?, ?it/s]

  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ignoring keyword argument %r" % k)
  warn("ign

ValueError: total size of new array must be unchanged