# Segment, localise and track


In [25]:
import btrack
import numpy as np
from cellpose import models
from tqdm.auto import tqdm

from macrohet import dataio, tile

### 1. Lazily load the images as a Dask array. 
This enables rapid initialisation of the segmentation workflow by avoiding loading the full image volume into memory.

In [2]:
metadata = dataio.read_harmony_metadata('../data/untiled_images/Index.idx.xml')

Reading metadata XML file...


0it [00:00, ?it/s]

Extracting metadata complete!


In [3]:
metadata

Unnamed: 0,id,State,URL,Row,Col,FieldID,PlaneID,TimepointID,ChannelID,FlimID,...,PositionZ,AbsPositionZ,MeasurementTimeOffset,AbsTime,MainExcitationWavelength,MainEmissionWavelength,ObjectiveMagnification,ObjectiveNA,ExposureTime,OrientationMatrix
0,0305K1F1P1R1,Ok,r03c05f01p01-ch1sk1fk1fl1.tiff,3,5,1,1,0,1,1,...,0,0.135621503,0,2021-04-16T19:11:18.61+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
1,0305K1F1P1R2,Ok,r03c05f01p01-ch2sk1fk1fl1.tiff,3,5,1,1,0,2,1,...,0,0.135621503,0,2021-04-16T19:11:18.61+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
2,0305K1F1P2R1,Ok,r03c05f01p02-ch1sk1fk1fl1.tiff,3,5,1,2,0,1,1,...,2E-06,0.1356235,0,2021-04-16T19:11:18.89+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
3,0305K1F1P2R2,Ok,r03c05f01p02-ch2sk1fk1fl1.tiff,3,5,1,2,0,2,1,...,2E-06,0.1356235,0,2021-04-16T19:11:18.89+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
4,0305K1F1P3R1,Ok,r03c05f01p03-ch1sk1fk1fl1.tiff,3,5,1,3,0,1,1,...,4E-06,0.135625601,0,2021-04-16T19:11:19.17+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4044,0305K75F9P1R2,Ok,r03c05f09p01-ch2sk75fk1fl1.tiff,3,5,9,1,74,2,1,...,0,0.135624304,266400.56,2021-04-19T21:11:30.447+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
4045,0305K75F9P2R1,Ok,r03c05f09p02-ch1sk75fk1fl1.tiff,3,5,9,2,74,1,1,...,2E-06,0.135626301,266400.56,2021-04-19T21:11:30.713+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
4046,0305K75F9P2R2,Ok,r03c05f09p02-ch2sk75fk1fl1.tiff,3,5,9,2,74,2,1,...,2E-06,0.135626301,266400.56,2021-04-19T21:11:30.713+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
4047,0305K75F9P3R1,Ok,r03c05f09p03-ch1sk75fk1fl1.tiff,3,5,9,3,74,1,1,...,4E-06,0.135628298,266400.56,2021-04-19T21:11:30.993+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."


In [4]:
image_dir = '../data/untiled_images/'
images = tile.compile_mosaic(image_dir, metadata, row=3, col=5, n_tile_cols=3, n_tile_rows=3, set_plane='max_proj')

In [5]:
images


Unnamed: 0,Array,Chunk
Bytes,410.62 MiB,357.78 kiB
Shape,"(75, 2, 1198, 1198)","(1, 1, 428, 428)"
Dask graph,1350 chunks in 1804 graph layers,1350 chunks in 1804 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 410.62 MiB 357.78 kiB Shape (75, 2, 1198, 1198) (1, 1, 428, 428) Dask graph 1350 chunks in 1804 graph layers Data type uint16 numpy.ndarray",75  1  1198  1198  2,

Unnamed: 0,Array,Chunk
Bytes,410.62 MiB,357.78 kiB
Shape,"(75, 2, 1198, 1198)","(1, 1, 428, 428)"
Dask graph,1350 chunks in 1804 graph layers,1350 chunks in 1804 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


### 2. Load segmentation model
Note: At the time this segmentation pipeline was developed, the default pretrained Cellpose models did not yield satisfactory performance on our images. Accordingly, we trained a custom model on full-scale image data to improve segmentation accuracy. However, recent advances (particularly the release of Cellpose 2.0 and the CellposeSAM model) have significantly improved the general performance of the default models. For many applications, these newer models may now perform on par with or better than custom-trained models, and should be considered before undertaking bespoke training.

In [20]:
# defining personal trained cellpose model to use
model_path = '../models/segmentation_model'
segmentation_model = models.CellposeModel(gpu=True, 
                             pretrained_model=model_path)

# Or use a default cellpose model
# segmentation_model = models.CellposeModel(gpu=True)

INFO:cellpose.core:** TORCH CUDA version installed and working. **
INFO:cellpose.core:>>>> using GPU (CUDA)
INFO:cellpose.models:>>>> loading model /home/dayn/.cellpose/models/cpsam


### 3. Segment entire array
Using the cytoplasmic GFP channel (enumerated 0) as input to the segmentation

In [22]:
masks_list = []

for gfp_frame in tqdm(images[:, 0, ...], desc="Segmenting GFP channel"):
    masks_frame, _, _ = segmentation_model.eval(gfp_frame,                    
                                   diameter=None,
                                   min_size=0
                                   )
    masks_list.append(masks_frame)
masks = np.stack(masks_list)

Segmenting GFP channel:   0%|          | 0/75 [00:00<?, ?it/s]

In [23]:
masks.shape

(75, 1198, 1198)

### 4. Localise objects and quantify Mtb per cell
With a few pre-defined parameters to speed up calculation.

In [31]:
# define thresholds
segment_size_thresh = 1000 # minimum size expected of Macrophage
mtb_load_thresh = 480 # determined via blind thresholding
properties=('area', 'mean_intensity')
# scale_factor = 1 # optional scale factor if images are large

In [32]:
# Determine thresholded Mtb presence across the specified Mtb channel
manual_mtb_thresh = images[:, 1, ...] >= mtb_load_thresh

# Construct a composite intensity image with GFP, RFP, and thresholded Mtb signal
# Shape: (T, Y, X, 3) — last axis channels: GFP, RFP, Mtb mask for regionprops inside btrack
intensity_image = np.stack([
    images[:, 0, ...],                # GFP channel
    images[:, 1, ...],                # RFP channel
    manual_mtb_thresh.astype(bool)    # Thresholded Mtb presence (binary)
], axis=-1)

objects = btrack.utils.segmentation_to_objects(
        segmentation=masks,
        intensity_image=intensity_image,
        properties=properties,
        # scale=(scale_factor, scale_factor),
        use_weighted_centroid=False
    )

# filter out objects that are too small to be cells
objects = [o for o in objects if o.properties['area'] > segment_size_thresh]

# add label for infection
for obj in objects:
    obj.properties = ({"Infected": True} 
                        if obj.properties['mean_intensity'][2] > 0 # index 2 for manual mtb channel 
                        else {"Infected": False})
    obj.properties = ({"Mtb area px": obj.properties['mean_intensity'][2]*obj.properties['area']}) # index 2 for manual mtb channel 

[INFO][2025/06/19 03:51:12 pm] Localizing objects from segmentation...
INFO:btrack.io._localization:Localizing objects from segmentation...
progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [08:14<00:00,  6.59s/it]
[INFO][2025/06/19 03:59:26 pm] Objects are of type: <class 'dict'>
INFO:btrack.io.utils:Objects are of type: <class 'dict'>
[INFO][2025/06/19 03:59:27 pm] ...Found 47218 objects in 75 frames.
INFO:btrack.io._localization:...Found 47218 objects in 75 frames.


In [34]:
# Inspect a single object prior to linking across time with btrack
objects[0]

Unnamed: 0,ID,x,y,z,t,dummy,states,label,area,mean_intensity,Infected,Mtb area px
0,6,952.848564,21.314186,0.0,0,False,7,5,1149.0,"(3,) array",False,0.0


### 5. Track across time
Supply btrack with the list of objects to link using Bayesian Tracking approach

In [None]:
tracking_model = '../models/tracking_model.json'


In [None]:
with btrack.BayesianTracker() as tracker:

    # configure the tracker using a config file
    tracker.configure(tracking_model)
    
    # set tracking parameters
    tracker.max_search_radius = 4
    tracker.tracking_updates = ["MOTION", "VISUAL"]
    
    # Set feature keys from first object
    tracker.features = list(objects[0].properties.keys())
    
    # Append segmented object list
    tracker.append(objects)
    
    # Define spatial volume in physical units using scale factor
    y_max, x_max = masks.shape[-2], masks.shape[-1]
    tracker.volume = (
        (0, masks.shape[-2]), # * scale_factor),
        (0, masks.shape[-1]) # * scale_factor)
    )
    
    # track them (in interactive mode)
    tracker.track(step_size=100)
    # generate hypotheses and run the global optimizer
    tracker.optimize()
    # store the tracks
    tracks = tracker.tracks


### 6. Save out tracks

#### Option 1: using btrack and h5 compression

In [None]:
tracks_output_fn = '../data/tracks.h5'

with btrack.io.HDF5FileHandler(
    tracks_output_fn, 
    'w', 
    obj_type='obj_type_1'
) as writer:
    writer.write_tracks(tracks)


#### Option 2: using Zarr (NGFF-style layout)

In [None]:
from pathlib import Path

import zarr

In [None]:
zarr_path = '../data/example_data.zarr/'

# Flatten all timepoints from all tracks
tracklets = [
    (track_id, p.t, p.y, p.x, p.properties)
    for track_id, track in enumerate(tracks)
    for p in track
]

# Extract main track array: [track_id, t, y, x]
track_array = np.array([
    (tid, t, y, x)
    for tid, t, y, x, _ in tracklets
], dtype=np.float32)

# Extract features
features = {
    "area": np.array([p["area"] for _, _, _, _, p in tracklets], dtype=np.float32),
    "orientation": np.array([p["orientation"] for _, _, _, _, p in tracklets], dtype=np.float32),
    "major_axis_length": np.array([p["major_axis_length"] for _, _, _, _, p in tracklets], dtype=np.float32),
    "minor_axis_length": np.array([p["minor_axis_length"] for _, _, _, _, p in tracklets], dtype=np.float32),
    "mean_intensity": np.stack([p["mean_intensity"] for _, _, _, _, p in tracklets]).astype(np.float32)
}

# Write to Zarr
store = zarr.open(zarr_path, mode="a")

# Main Napari-compatible tracks array
store.create_dataset("tracks", data=track_array, compressor=zarr.Blosc(), overwrite=True)

# Features group
feat_grp = store.require_group("features")
for key, arr in features.items():
    feat_grp.create_dataset(key, data=arr, compressor=zarr.Blosc(), overwrite=True)

# Add Napari track metadata
store.attrs["tracks_metadata"] = {
    "format_version": "0.1",
    "type": "napari_tracks",
    "columns": ["track_id", "time", "y", "x"]
}


### 7. Save segmentation out 

#### Option 1: using btrack and h5 compression

In [None]:
segmentation_output_fn = '../data/segmentation.h5'

with btrack.io.HDF5FileHandler(segmentation_output_fn, 
                                   'w', 
                                   obj_type='obj_type_1'
                                   ) as writer:
        writer.write_segmentation(masks)

#### Option 2: using Zarr (NGFF-style layout)

In [None]:
segmentation_output_fn = '../data/example_data.zarr'
label_group_path = Path(segmentation_output_fn) / 'labels' / '0'

label_group_path.mkdir(parents=True, exist_ok=True)

zarr.save_array(
    store=label_group_path,
    arr=masks,  # this should be your stacked segmentation array
    compressor=zarr.Blosc(cname='zstd', clevel=5),
    overwrite=True
)

# attach NGFF label metadata to Zarr root
zarr_root = zarr.open_group(segmentation_output_fn, mode='a')
zarr_root.attrs['labels'] = [{"path": "labels/0", "type": "label"}]

### 8. Save out single-cell quantifications prior to tracking across time
In case you would like to retrack with different parameters at a later date

In [None]:
objects_output_fn = '../data/objects.h5'

with btrack.io.HDF5FileHandler(
    objects_output_fn, 
    'w', 
    obj_type='obj_type_1'
) as writer:
    writer.write_objects(objects)
