# Scoring Segmentation

Segment an image with corresponding ground truth and score different network performances

In [1]:
import napari
import cellpose
import octopuslite
from octopuslite import tile
from tqdm.auto import tqdm
import numpy as np
import datetime 
from skimage.io import imsave, imshow, imread
from skimage.measure import label, regionprops
import skimage as ski
from skimage.morphology import remove_small_objects, remove_small_holes, binary_erosion
import os
import itertools, os
from tqdm.auto import tqdm
import scipy.ndimage as ndi
import sys
sys.path.append('../../unet_segmentation_metrics/')
import umetrics
import matplotlib.pyplot as plt

!nvcc --version
!nvidia-smi

from cellpose import core, utils, io, models, metrics

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')

model = models.Cellpose(gpu=True, model_type='cyto')


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243
Thu Jan 12 11:48:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.86.01    Driver Version: 515.86.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:65:00.0  On |                  Off |
| 30%   40C    P8    34W / 300W |   1216MiB / 49140MiB |     14%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
 

### Load experiment of choice

The Opera Phenix is a high-throughput confocal microscope that acquires very large 5-dimensional (TCZXY) images over several fields of view in any one experiment. Therefore, a lazy-loading approach is chosen to mosaic, view and annotate these images. This approach depends upon Dask and DaskFusion. The first step is to load the main metadata file (typically called `Index.idx.xml` and located in the main `Images` directory) that contains the image filenames and associated TCXZY information used to organise the images.

In [2]:
image_dir = '/mnt/DATA/sandbox/pierre_live_cell_data/outputs/Replication_IPSDM_GFP/Images/'
metadata_fn = '/mnt/DATA/sandbox/pierre_live_cell_data/outputs/Replication_IPSDM_GFP/Index.idx.xml'
metadata = octopuslite.utils.read_harmony_metadata(metadata_fn)

Reading metadata XML file...


Extracting HarmonyV5 metadata:   0%|          | 0/113400 [00:00<?, ?it/s]

Extracting metadata complete!


### View assay layout and mask information (optional)

The Opera Phenix acquires many time lapse series from a range of positions. The first step is to inspect the image metadata, presented in the form of an `Assaylayout/experiment_ID.xml` file, to show which positions correspond to which experimental assays.

In [3]:
metadata_path = '/mnt/DATA/sandbox/pierre_live_cell_data/outputs/Replication_IPSDM_GFP/Assaylayout/20210602_Live_cell_IPSDMGFP_ATB.xml'
assay_layout_df = octopuslite.utils.read_harmony_metadata(metadata_path, assay_layout=True)
octopuslite.utils.read_harmony_metadata(metadata_path, assay_layout=True)

Reading metadata XML file...
Extracting metadata complete!
Reading metadata XML file...
Extracting metadata complete!


Unnamed: 0,Unnamed: 1,Strain,Compound,Concentration,ConcentrationEC
3,4,RD1,CTRL,0.0,EC0
3,5,WT,CTRL,0.0,EC0
3,6,WT,PZA,60.0,EC50
3,7,WT,RIF,0.1,EC50
3,8,WT,INH,0.04,EC50
3,9,WT,BDQ,0.02,EC50
4,4,RD1,CTRL,0.0,EC0
4,5,WT,CTRL,0.0,EC0
4,6,WT,PZA,60.0,EC50
4,7,WT,RIF,0.1,EC50


# Load ground truth-image pairs

And tidy up image so that it is in binary format

In [4]:
gt = imread('/mnt/DATA/macrohet/segmentation/training/ground_truth/r03c04f*p01-ch99sk38fk1fl1.tiff_2023_01_10-05:48:17_pm.tiff')#.astype(np.uint16)
img = imread('/mnt/DATA/macrohet/segmentation/training/ground_truth/r03c04f*p01-ch1sk38fk1fl1.tiff')#.astype(np.uint16)

In [5]:
true = gt
### unique labels
true = label(true)
true = remove_small_objects(true, min_size=10000)
### need to iterate over each individual segment and erode
new_mask = np.zeros(true.shape, dtype = np.uint8)
for segment_ID in tqdm(range(1, np.max(true))):
    segment = true == segment_ID 
    eroded_segment = binary_erosion(segment)
    new_mask += eroded_segment.astype(np.uint8)
true = remove_small_holes(new_mask).astype(np.uint8)

  0%|          | 0/1072 [00:00<?, ?it/s]

  return func(*args, **kwargs)


# Load a series of model outputs

With varying cellpose parameters

In [6]:
### average cell diameter
diameters = np.linspace(200, 300, 3)
### flow threshold, larger value means more ROIs (maybe ill fitting), lower means fewer ROIs 
flow_thresholds = np.linspace(0,2,3)
### cellprob_threshold, larger is is fewer ROIs, lower means more...? 
cellprobs_thresholds = np.linspace(-3,3, 7)

In [7]:
params = list(itertools.product(diameters, flow_thresholds, cellprobs_thresholds))
len(params)

63

In [8]:
model_output_dict = dict()

In [None]:
for diameter, flow_threshold, cellprob_threshold in tqdm(params, total = len(params)):
    mask_stack = []
    masks, flows, styles, diams = model.eval(img, diameter=diameter, channels=[0,0],
                                             flow_threshold=flow_threshold, cellprob_threshold=cellprob_threshold)        
    model_output_dict[(diameter, flow_threshold, cellprob_threshold)] = masks
    np.save('model_output_dict_final_2.npy', model_output_dict)

  0%|          | 0/63 [00:00<?, ?it/s]

### Or skip and load already segmented options

In [8]:
model_output_dict = np.load('model_output_dict_final_2.npy', allow_pickle=w_pickle = True)

In [13]:
model_output_dict = model_output_dict.item()

# Prepare model output for scoring

Binarise and remove holes etc

In [16]:
for param in model_output_dict:
    if 250.0 in param:
        print(param)

(250.0, 0.0, -3.0)
(250.0, 0.0, -2.0)
(250.0, 0.0, -1.0)
(250.0, 0.0, 0.0)
(250.0, 0.0, 1.0)
(250.0, 0.0, 2.0)
(250.0, 0.0, 3.0)
(250.0, 1.0, -3.0)
(250.0, 1.0, -2.0)
(250.0, 1.0, -1.0)
(250.0, 1.0, 0.0)


In [17]:
for n, param in tqdm(enumerate(reversed(model_output_dict)), total = len(model_output_dict)):
    
    if 250.0 in param:
    
        pred = model_output_dict[param]
        pred = remove_small_objects(pred, min_size=10000)
        ### need to iterate over each individual segment and erode
        new_mask = np.zeros(pred.shape, dtype = np.uint8)
        for segment_ID in tqdm(range(1, np.max(pred))):
            segment = pred == segment_ID 
            eroded_segment = binary_erosion(segment)
            new_mask += eroded_segment.astype(np.uint8)
        pred = remove_small_holes(new_mask).astype(np.uint8)
        model_output_dict[param] = pred

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/725 [00:00<?, ?it/s]

  return func(*args, **kwargs)


  0%|          | 0/754 [00:00<?, ?it/s]

  0%|          | 0/818 [00:00<?, ?it/s]

  0%|          | 0/921 [00:00<?, ?it/s]

  0%|          | 0/1466 [00:00<?, ?it/s]

  0%|          | 0/1126 [00:00<?, ?it/s]

  0%|          | 0/1023 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/892 [00:00<?, ?it/s]

  0%|          | 0/948 [00:00<?, ?it/s]

  0%|          | 0/1064 [00:00<?, ?it/s]

# Conduct segmentation scoring

In [None]:
score_dict = dict()
for param in tqdm(model_output_dict, total = len(model_output_dict)):
    result = umetrics.calculate(true, pred)
    score_dict[param] = result.results
    np.save('scores/score_dict_final.npy', score_dict)
    with open('scores/score_param_{param}.txt', 'w') as f:
        f.write(result.results)
    result.plot()
    plt.savefig(f'scores/{param}_overlay.png', dpi = 314, bbox_inches = 'tight')
    plt.clf()