# Manual segmentation and tracking

#### Load modules, define functions and parameters

In [41]:
import os
from glob import glob
from homuncu_loc.dataio import try_load_nemo_h5
from skimage import io
import napari
import btrack
import shutil

### Pre-defined images to pay attention to

In [2]:
# Create a list called image_IDs
image_IDs = [
1556,
1557,
1558,
1568, 
1566]

# Print the value of image_IDs
image_IDs

[1556, 1557, 1558, 1568, 1566]

### Set address for NEMO

In [3]:
# Set the base directory of the NEMO home folder, is different depending on your operating system
base_dir = '/run/user/30046150/gvfs/smb-share:server=data2.thecrick.org,share=lab-gutierrezm'

### Find corresponding image filenames

In [4]:
# Set the base directory of the images
image_basedir = os.path.join(base_dir, 'home/shared/Lung on Chip/homuncu_loc_image_analysis/iAT1_iAT2_iVEC_macrophage_experiments/DAPI_ZO1_CD16_MTB/images')

# Create a list of image filenames using list comprehension
# Each filename is obtained by using glob function to find the image file that matches the image_ID in the given directory
image_fns = [glob(os.path.join(image_basedir, f'*{image_ID}*.tif'))[0] for image_ID in image_IDs]

# Print the list of image filenames
[f'{i} : {os.path.basename(fn)}' for i, fn in enumerate(image_fns)]

['0 : 20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556.tif',
 '1 : 20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1557.tif',
 '2 : 20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1558.tif',
 '3 : 20231128_20X_23-03-072A4_DAPI_ZO-1_CD16_Mtb_WT_48hpi_Multichannel Z-Stack_20231128_1568.tif',
 '4 : 20231128_20X_23-03-072A4_DAPI_ZO-1_CD16_Mtb_WT_48hpi_Multichannel Z-Stack_20231128_1566.tif']

In [19]:
# Set the base directory of the single-cell analyses
sc_basedir = os.path.join(base_dir, 'home/shared/Lung on Chip/homuncu_loc_image_analysis/iAT1_iAT2_iVEC_macrophage_experiments/DAPI_ZO1_CD16_MTB/sc_analyses')

# Create a list of single-cell analysis filenames using list comprehension
# Each filename is obtained by using glob function to find the file that matches the image_ID in the given directory
sc_fns = [glob(os.path.join(sc_basedir, f'*{image_ID}*gt_mphi.h5'))[0] for image_ID in image_IDs]

# Print the list of single-cell analysis filenames
sc_fns

IndexError: list index out of range

### Select one image to work with

And load it into the memory

In [5]:
%%time # tells you how long it took to load an image

# define image filename
image_fn = image_fns[0]

# Read image file into memory
images = io.imread(image_fn)

### Find single-cell labels
If they exist

In [11]:
# Set the base directory of the single-cell analyses
sc_basedir = os.path.join(base_dir, 'home/shared/Lung on Chip/homuncu_loc_image_analysis/iAT1_iAT2_iVEC_macrophage_experiments/DAPI_ZO1_CD16_MTB/sc_analyses')

# Strip image filename to match sc_fn
sc_fn_pattern = os.path.basename(image_fn).replace('.tif', '')

# Find all options for sc data
sc_fns = glob(os.path.join(sc_basedir, f'*{sc_fn_pattern}*.h5'))

# Print available sc analyses
if sc_fns:
    print([f'{i} : {os.path.basename(fn)}' for i, fn in enumerate(sc_fns)])
else:
    print('No matching files found.')

['0 : 20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556_gt_mphi.h5', '1 : 20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556_mphi_gt_masks.h5']


### Select one set of single-cell labels to work with

In [12]:
# select sc_fn based on above enumeration 
sc_fn = sc_fns[0]

# Use the try and load function to load the single-cell data
masks = try_load_nemo_h5(sc_fn, return_options = ['masks'])[0]

[INFO][2023/12/15 01:52:20 PM] Opening HDF file: /run/user/30046150/gvfs/smb-share:server=data2.thecrick.org,share=lab-gutierrezm/home/shared/Lung on Chip/homuncu_loc_image_analysis/iAT1_iAT2_iVEC_macrophage_experiments/DAPI_ZO1_CD16_MTB/sc_analyses/20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556_gt_mphi.h5...
[INFO][2023/12/15 01:52:25 PM] Loading segmentation (155, 2304, 2304)
[INFO][2023/12/15 01:52:25 PM] Closing HDF file: /run/user/30046150/gvfs/smb-share:server=data2.thecrick.org,share=lab-gutierrezm/home/shared/Lung on Chip/homuncu_loc_image_analysis/iAT1_iAT2_iVEC_macrophage_experiments/DAPI_ZO1_CD16_MTB/sc_analyses/20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556_gt_mphi.h5


### Create temporary output directory to save masks too whilst working on them

In [30]:
# define cell type for GT masks 
cell_type = 'mphi' #iat1, iat2 etc
# define temporary local directory 
output_dir = './temp_mask_output/'
# create directory if it doesnt exist
os.makedirs(output_dir, exist_ok=True)
# define output fn
output_masks_fn = os.path.basename(image_fn).replace('.tif', f'_gt_{cell_type}_masks.h5')
# join together to full output path
output_path = os.path.join(output_dir, output_masks_fn)

##### Optionally, load tracks to inspect the quality of them

In [None]:
# Use the try and load function to load the single-cell data
tracks = try_load_nemo_h5(sc_fn, return_options = ['tracks'])

# Convert the tracks to a format compatible with Napari
napari_tracks, _, _ = btrack.utils.tracks_to_napari(tracks, ndim=2)

### Launch napari to manually label cells

If cells are fixed in position then disparate masking can be used, where you only label the first and last instance of the cell across Z. If the shape changes then add more masks to unite a morphological change over Z. 

In [21]:
# Initialize a Napari viewer with the title as the base name of the image file
viewer = napari.Viewer(title=os.path.basename(image_fn))
# viewer.add_image(images, channel_axis=-1)  # Add the image to the viewer, assuming it has multiple channels
viewer.add_labels(masks)  # Add the segmentation masks as labels
# if you want to add tracks, uncomment the line below
# viewer.add_tracks(napari_tracks)  # Add the tracks to the viewer
# function to save out masks as you go 
@viewer.bind_key('s', overwrite=True)
def save_out_masks(viewer):
    # save out latest version of masks
    with btrack.io.HDF5FileHandler(output_path, 'w', obj_type='obj_type_1') as writer:
            writer.write_segmentation(masks)

viewer.show(block=True)

# save out final version of masks only after you have closed the window
with btrack.io.HDF5FileHandler(output_path, 'w', obj_type='obj_type_1') as writer:
        writer.write_segmentation(masks)

[INFO][2023/12/15 02:01:04 pm] Opening HDF file: ./temp_mask_output/20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556_gt_masks.h5...
[INFO][2023/12/15 02:01:12 pm] Closing HDF file: ./temp_mask_output/20231128_20X_23-03-072A3_DAPI_ZO-1_CD16_Mtb_WT_2hpi_Multichannel Z-Stack_20231128_1556_gt_masks.h5


### Unite the disparate masks over Z

In [None]:
# Initialize a new 3D array
linked_masks = np.zeros_like(masks)

# Identify all unique cell IDs (excluding 0 as it usually represents background)
unique_cells = np.unique(masks[masks > 0])

# Iterate over each cell ID
for cell_id in tqdm(unique_cells, total = len(unique_cells)):
    
    # Find all z-indices where this cell ID appears
    z_indices = np.unique(np.where(masks == cell_id)[0])

    # Check if there are at least two instances
    if len(z_indices) > 1:
        
        # iterate over each step between successive manually created masks
        for step in tqdm(range(len(z_indices)-1), desc = f'Iterating over cell ID {cell_id} GT masks', total = (len(z_indices)-1), leave = False):

            # find the first and next z indices that this mask appears in 
            first_z, next_z = z_indices[step], z_indices[step+1]

            # iterate over the frames in this range 
            for z in tqdm(range(first_z, next_z), desc = 'Filling in the gaps', total = len(range(first_z, next_z)), leave = False):
                
                # change the next frame to feature the mask of cell ID from the current frame
                linked_masks[z+1][masks[first_z] == cell_id] = cell_id

##### Optionally check these linked masks

In [None]:
# Initialize a Napari viewer with the title as the base name of the image file
viewer = napari.Viewer(title=os.path.basename(image_fn) + ' Linked masks')
viewer.add_image(images, channel_axis=-1)  # Add the image to the viewer, assuming it has multiple channels
viewer.add_labels(linked_masks) # Add linked masks

### Track over Z

In [None]:
# Convert the segmentation masks to objects using btrack
objects = btrack.utils.segmentation_to_objects(
                                                segmentation=linked_masks,
                                                intensity_image=image,
                                                # properties=props,
                                                use_weighted_centroid=False,
                                                assign_class_ID=True,
                                                )

# Check if mtb infected above threshold and measure mtb properties for each cell
threshold = 230
mtb_ch = 3

for o in tqdm(objects, desc='Measuring Mtb properties of each cell'):
    coordinates = np.argwhere(masks[o.t] == o.properties['class_id'])
    pixel_values = image[o.t, coordinates[:, 0], coordinates[:, 1]]
    mtb_status = np.any(pixel_values[:,mtb_ch] > threshold)
    mtb_area = np.sum(pixel_values[:,mtb_ch] > threshold)
    o.properties['mtb_status'] = mtb_status
    o.properties['mtb_area'] = mtb_area
    # o.properties['pixel_values'] = pixel_values

# Initialize BayesianTracker
with btrack.BayesianTracker() as tracker:
    
    # Configure the tracker using a config file
    tracker.configure('/home/dayn/analysis/models/btrack/loc.json')
    
    # Set max search radius to a very limited radius
    tracker.max_search_radius = 10
    
    # Define tracking method
    tracker.tracking_updates = ["MOTION"]#, "VISUAL"]
    
    # Append the objects to be tracked
    tracker.append(objects)
    
    # Set the volume
    tracker.volume = ((0, linked_masks.shape[1]), (0, linked_masks.shape[2]), (-1e5, 1e5))
    
    # Track them (in interactive mode)
    tracker.track(step_size=10)
    
    # Generate hypotheses and run the global optimizer
    tracker.optimize()
    
    # Get the tracks as a Python list
    tracks = tracker.tracks

if len(tracks) != np.max(linked_masks):
    print(f'Error with tracks and masks mismatch: {(len(tracks), np.max(masks))}')

### Write out locally first

In [None]:
# define output fn
output_sc_fn = os.path.basename(image_fn).replace('.tif', f'_gt_{cell_type}_sc_data.h5')
# join together to full output path
output_path = os.path.join(output_dir, output_masks_fn)
# # Write the tracks and segmentation masks to an HDF5 file using btrack.io.HDF5FileHandler
with btrack.io.HDF5FileHandler(output_path, 'w', obj_type='obj_type_1') as writer:
    writer.write_tracks(tracks)
    writer.write_segmentation(linked_masks)

### Optionally move the ground truth sc data to the main image analysis folder on NEMO

In [43]:
# Define source and destination paths
source = output_path # Path of the file to be copied
nemo_output_path = os.path.join(sc_basedir, os.path.basename(output_path)) # Destination path to copy the file to

# Copy the file from the source to the destination
shutil.copy(source, nemo_output_path)