# Creating training and testing data for segmentation


In [27]:
import napari
from macrohet import dataio, tile, visualise, tools
import btrack
import os
from tqdm.auto import tqdm
from skimage.io import imread, imsave
from skimage.morphology import label
import numpy as np
import glob

### Use cellpose to bootstrap segmentation maps

In [165]:
from cellpose import models, core
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
# defining cellpose model to use
model = models.Cellpose(
                        gpu=True, 
                        model_type='cyto', 
                        net_avg=True, 
                        device=torch.device('cuda')
                        )

# This checks to see if you have set up your GPU properly.
# CPU performance is a lot slower, but not a problem if you 
# are only processing a few images.
use_GPU = core.use_gpu()
print('>>> GPU activated? %d'%use_GPU)

def segment(frame, model = model, channels = [0,0], diameter = 198, min_size = 0):
    
    masks, flows, styles, diams = model.eval(frame, 
                                             channels=channels, 
                                             diameter=diameter, 
                                             min_size=min_size, 
                                             progress=True
                                             )
    return masks

@v.bind_key("s", overwrite = True)
def save_masks(v):  
    v.layers['ds_gt_masks'].save(gt_mask_fn.replace('semantic', 'backup'))
    print('Saved')
@v.bind_key("b", overwrite = True)
def show_bright_gt_image(v):  
    v.layers['ds_images_bright'].visible = 1 if v.layers['ds_images_bright'].visible == 0 else 0

Using device: cuda

NVIDIA RTX A6000
Memory Usage:
Allocated: 0.0 GB
Cached:    9.3 GB
>>> GPU activated? 1


### Load experiment of choice

The Opera Phenix is a high-throughput confocal microscope that acquires very large 5-dimensional (TCZXY) images over several fields of view in any one experiment. Therefore, a lazy-loading approach is chosen to mosaic, view and annotate these images. This approach depends upon Dask and DaskFusion. The first step is to load the main metadata file (typically called `Index.idx.xml` and located in the main `Images` directory) that contains the image filenames and associated TCXZY information used to organise the images.

In [3]:
base_dir = '/mnt/DATA/macrohet/'
# base_dir = '/Volumes/lab-gutierrezm/home/users/dayn/macrohet_nemo/'
metadata_fn = os.path.join(base_dir, 'macrohet_images/Index.idx.xml')
metadata = dataio.read_harmony_metadata(metadata_fn)  
metadata

Reading metadata XML file...


Extracting HarmonyV5 metadata:   0%|          | 0/113400 [00:00<?, ?it/s]

Extracting metadata complete!


Unnamed: 0,id,State,URL,Row,Col,FieldID,PlaneID,TimepointID,ChannelID,FlimID,...,PositionZ,AbsPositionZ,MeasurementTimeOffset,AbsTime,MainExcitationWavelength,MainEmissionWavelength,ObjectiveMagnification,ObjectiveNA,ExposureTime,OrientationMatrix
0,0303K1F1P1R1,Ok,r03c03f01p01-ch1sk1fk1fl1.tiff,3,3,1,1,0,1,1,...,0,0.135583505,0,2021-04-16T19:09:33.84+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
1,0303K1F1P1R2,Ok,r03c03f01p01-ch2sk1fk1fl1.tiff,3,3,1,1,0,2,1,...,0,0.135583505,0,2021-04-16T19:09:33.84+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
2,0303K1F1P2R1,Ok,r03c03f01p02-ch1sk1fk1fl1.tiff,3,3,1,2,0,1,1,...,2E-06,0.135585502,0,2021-04-16T19:09:34.12+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
3,0303K1F1P2R2,Ok,r03c03f01p02-ch2sk1fk1fl1.tiff,3,3,1,2,0,2,1,...,2E-06,0.135585502,0,2021-04-16T19:09:34.12+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
4,0303K1F1P3R1,Ok,r03c03f01p03-ch1sk1fk1fl1.tiff,3,3,1,3,0,1,1,...,4E-06,0.135587499,0,2021-04-16T19:09:34.4+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113395,0609K75F9P1R2,Ok,r06c09f09p01-ch2sk75fk1fl1.tiff,6,9,9,1,74,2,1,...,0,0.135533601,266399.61,2021-04-19T21:14:19.477+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
113396,0609K75F9P2R1,Ok,r06c09f09p02-ch1sk75fk1fl1.tiff,6,9,9,2,74,1,1,...,2E-06,0.135535598,266399.61,2021-04-19T21:14:19.757+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
113397,0609K75F9P2R2,Ok,r06c09f09p02-ch2sk75fk1fl1.tiff,6,9,9,2,74,2,1,...,2E-06,0.135535598,266399.61,2021-04-19T21:14:19.757+01:00,640,706,40,1.1,0.2,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."
113398,0609K75F9P3R1,Ok,r06c09f09p03-ch1sk75fk1fl1.tiff,6,9,9,3,74,1,1,...,4E-06,0.135537595,266399.61,2021-04-19T21:14:20.037+01:00,488,522,40,1.1,0.1,"[[0.990860,0,0,-15.9],[0,-0.990860,0,-44.8],[0..."


### View assay layout and mask information (optional)

The Opera Phenix acquires many time lapse series from a range of positions. The first step is to inspect the image metadata, presented in the form of an `Assaylayout/experiment_ID.xml` file, to show which positions correspond to which experimental assays.

In [4]:
metadata_path = os.path.join(base_dir, 'macrohet_images/Assaylayout/20210602_Live_cell_IPSDMGFP_ATB.xml')
assay_layout = dataio.read_harmony_metadata(metadata_path, assay_layout=True,)# mask_exist=True,  image_dir = image_dir, image_metadata = metadata)
assay_layout

Reading metadata XML file...
Extracting metadata complete!


Unnamed: 0,Unnamed: 1,Strain,Compound,Concentration,ConcentrationEC
3,4,RD1,CTRL,0.0,EC0
3,5,WT,CTRL,0.0,EC0
3,6,WT,PZA,60.0,EC50
3,7,WT,RIF,0.1,EC50
3,8,WT,INH,0.04,EC50
3,9,WT,BDQ,0.02,EC50
4,4,RD1,CTRL,0.0,EC0
4,5,WT,CTRL,0.0,EC0
4,6,WT,PZA,60.0,EC50
4,7,WT,RIF,0.1,EC50


### Define row and column of choice

In [153]:
row = 6
column = 8
frame = 60

### Now to lazily mosaic the images using Dask prior to viewing them.

1x (75,2,3) [TCZ] image stack takes approximately 1 minute to stitch together, so only load the one field of view I want.

In [154]:
image_dir = os.path.join(base_dir, 'macrohet_images/Images')
gt_image = tile.compile_mosaic(image_dir, 
                             metadata, 
                             row, column, 
                             set_plane='sum_proj',
                             set_channel=1,
                             set_time = frame,
#                             input_transforms = [input_transforms]
                            ).compute().compute().astype(np.uint16)[0,0,...]

In [155]:
gt_image.shape

(6048, 6048)

In [156]:
# optional rfp channel to aid segmentation
other_ch_image = tile.compile_mosaic(image_dir, 
                             metadata, 
                             row, column, 
                             set_plane='sum_proj',
                             set_channel=2,
                             set_time = frame,
#                             input_transforms = [input_transforms]
                            ).compute().compute().astype(np.uint16)[0,0,...]

### Save out GFP image

In [157]:
gt_image_fn = f'/mnt/DATA/macrohet/upstream_development/segmentation/ground_truth/raw_images/r0{row}c0{column}f0-p0-ch1sk{frame+1}fk1fl1.tiff'
imsave(gt_image_fn, gt_image, check_contrast=False)

### Define gt mask fn

In [158]:
gt_mask_fn = f'/mnt/DATA/macrohet/upstream_development/segmentation/ground_truth/semantic/r0{row}c0{column}f0-p0-ch1sk{frame+1}fk1fl1_gt_sem_masks.tiff'

###  Bootstrap the manual labelling by quickly segmenting the gt image 

In [159]:
gt_masks = segment(gt_image)

In [160]:
# need to convert from integer ID (instance segmentation) to semantic binary segmentation
gt_masks = tools.instance_to_semantic(gt_masks)
# this makes manual annotation much easier and saves data

  0%|          | 0/607 [00:00<?, ?it/s]

###### Or load previous mask for checking

In [19]:
# gt_masks = imread(gt_mask_fn)

### Solve lag time by downscaling images

In [161]:
ds_images, ds_gt_masks = visualise.downscale_images_for_manual_annotation(gt_image, gt_masks, scale_factor=4)

# Manually annotate

In [162]:
v = napari.Viewer()
v.add_image(ds_images, colormap='green', contrast_limits=[0,8000], blending = 'additive')
v.add_image(ds_images, colormap='green', contrast_limits=[0,1500], blending = 'additive', name = 'ds_images_bright', visible = False)
visualise.add_napari_grid_overlay(viewer = v, N_rows_cols=5)
v.add_labels(ds_gt_masks, color={1:'yellow'}, opacity=0.5, blending = 'additive')
v.add_image(other_ch_image, colormap='magenta', contrast_limits = [0,4000], blending = 'additive', scale=[1/4, 1/4])

v0.5.0. It is considered an "implementation detail" of the napari
application, not part of the napari viewer model. If your use case
requires access to qt_viewer, please open an issue to discuss.
  self.tools_menu = ToolsMenu(self, self.qt_viewer.viewer)


<Image layer 'other_ch_image' at 0x7f3b86e209d0>

In [163]:
print()




### Upscale images again and check in napari prior to saving out

In [170]:
gt_mask_updated = visualise.upscale_labels_post_manual_annotation(ds_gt_masks, 4).astype(np.uint8)

In [171]:
v = napari.Viewer()
v.add_image(gt_image, colormap='green', contrast_limits=[0,8000], blending = 'additive'
#             channel_axis=1,
#             name=["macrophage", "mtb"],
#             colormap=["green",  "magenta"],
#             contrast_limits=[[0,6000], [0,1000]], 
#             visible = True
           )
v.add_labels(gt_mask_updated)
v.add_labels(label(gt_mask_updated))

v0.5.0. It is considered an "implementation detail" of the napari
application, not part of the napari viewer model. If your use case
requires access to qt_viewer, please open an issue to discuss.
  self.tools_menu = ToolsMenu(self, self.qt_viewer.viewer)


<Labels layer 'Labels' at 0x7f3b89d3cd90>

### Post editing save out

In [172]:
# semantic 
imsave(gt_mask_fn, gt_mask_updated)

  imsave(gt_mask_fn, gt_mask_updated)


In [173]:
# numpy (not really necessary)
np.save(f'/mnt/DATA/macrohet/upstream_development/segmentation/ground_truth/npy/r0{row}c0{column}f0-p0-ch1sk{frame+1}fk1fl1_gt_sem_masks', gt_mask_updated)

In [174]:
# instance
imsave(gt_mask_fn.replace('semantic', 'instance').replace('sem', 'inst'), 
       label(gt_mask_updated).astype(np.uint16))

  imsave(gt_mask_fn.replace('semantic', 'instance').replace('sem', 'inst'),


### How many cells have I manually counted?

In [3]:
import glob, os
import numpy as np
from skimage.io import imread

In [4]:
N_cells = 0
for fn in glob.glob('/mnt/DATA/macrohet/upstream_development/segmentation/ground_truth/instance/*.tiff'):
    print(os.path.basename(fn))
    N_cells+=np.max(imread(fn))

r03c07f0-p0-ch1sk16fk1fl1_gt_inst_masks.tiff
r03c05f0-p0-ch1sk75fk1fl1_gt_inst_masks.tiff
r03c08f0-p0-ch1sk61fk1fl1_gt_inst_masks.tiff
r06c08f0-p0-ch1sk61fk1fl1_gt_inst_masks.tiff
r06c09f0-p0-ch1sk1fk1fl1_gt_inst_masks.tiff
r03c06f0-p0-ch1sk35fk1fl1_gt_inst_masks.tiff
r03c04f0-p0-ch1sk2fk1fl1_gt_inst_masks.tiff
r03c09f0-p0-ch1sk71fk1fl1_gt_inst_masks.tiff


In [5]:
N_cells

4934