In [2]:
#Inputs: Range of minimum masks (hard coded to 35), pretrained model, desired image type folder, path to evaluation pipeline, drg4 path, manual drg4 path
#Outputs: Folder of metrics csvs for every flow-prob threshold combination and three cumulative csvs of mAP50, mAP50-95 and all values recorded in the csvs
import os, shutil
import numpy as np
import matplotlib.pyplot as plt
from cellpose import core, utils, io, models, metrics, plot
from glob import glob
import pandas as pd
import re
import cv2
from PIL import Image
from read_roi import read_roi_file
from read_roi import read_roi_zip
from cellpose import models
from cellpose import train
from cellpose import io
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
from tqdm import tqdm
import random

# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)
random.seed(SEED)

# Optional: for PyTorch-based models (Cellpose may use this internally)
import torch
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')
%run /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Finalized\ Code/Training/evaluationPipeline.ipynb

def save_rois_with_conversion(masks, files, output_dir):
    # Ensure all masks are in the correct format
    masks = [mask.astype(np.uint8) if mask.dtype != np.uint8 else mask for mask in masks]
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    count = 0
    for file, mask in zip(files, masks):
        count += 1
        output_path = os.path.join(output_dir, file.split('/')[-1].replace('.jpg',''))
        io.save_rois(mask, output_path)

def zip_to_csv(directory):
    for filename in [i for i in os.listdir(directory) if '.zip' in i]:
        df_data = {'Name': [], 'X': [], 'Y': []}
        file_path = os.path.join(directory, filename)
        data = read_roi_zip(file_path)
        for name, roi in data.items():
            df_data['X'].extend(roi['x'])
            df_data['Y'].extend(roi['y'])
            for i in range(len(roi['x'])):
                df_data['Name'].append(str(roi['name']))
        df = pd.DataFrame.from_dict(df_data)
        out_dir = os.path.join(directory, 'csv')
        os.makedirs(out_dir, exist_ok=True)
        df.to_csv(os.path.join(out_dir, filename).replace('.zip','.csv').replace('_rois','').replace('Nonseg','Seg'), sep=',', index=False, header=True)

def optimize_flowprob(model, images, files, eval_dir, base_dir, txt_folder, diameter, chan, chan2):
    # Initialize variables
    tdplot50 = {}
    tdplot50_95 = {}
    allVals = {}
    
    # Define prob and flow ranges
    prob_values = np.arange(-6, 7)  # Prob values from -6 to 6
    flow_values = np.arange(0.1, 3.1, 0.1)  # Flow values from 0 to 3 with a step of 0.1

    # Total number of iterations for tqdm
    total_iterations = len(flow_values) * len(prob_values)

    # Iterate over flow and prob values with progress tracking
    for flow in tqdm(flow_values, desc="Flow Threshold Loop", position=0, leave=True):
        for prob in tqdm(prob_values, desc=f"Prob Threshold Loop (flow={flow})", position=1, leave=False):
            x = (flow, prob)

            # Evaluate the model
            masks, flows, styles = model.eval(
                images,
                channels=[chan, chan2],
                diameter=diameter,
                flow_threshold=flow,
                cellprob_threshold=prob
            )

            # Save the segmentation results
            io.masks_flows_to_seg(
                images,
                masks,
                flows,
                files,
                channels=[chan, chan2],
                diams=diameter * np.ones(len(masks)),
            )

            save_rois_with_conversion(masks, files, eval_dir)
            zip_to_csv(eval_dir)

            # Evaluate model performance
            performance = optimized_model_eval_files(man_path, pred_path)
            print(man_path, pred_path)
            
            # Store the F1-Score in the dictionary
            tdplot50[x] = performance['cell_f1_50'][-1]
            tdplot50_95[x] = performance['cell_f1_50-95'][-1]
            excluded_keys = ['List of Intersections', 'List of IoUs']
            filtered_performance = {k: v for k, v in performance.items() if k not in excluded_keys}
            allVals[x] = filtered_performance

    # Save results to CSV
    res50 = pd.DataFrame.from_dict(tdplot50, orient='index', columns=['mAF50'])
    res50_95 = pd.DataFrame.from_dict(tdplot50_95, orient='index', columns=['mAF50-95'])
    allVals_df = pd.DataFrame.from_dict(allVals, orient='index')

    return res50, res50_95, allVals_df

#@markdown ###Path to images and masks:
base_dir = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Batch2/DRG123'

#@markdown ###Path to images and masks:
img_folder = 'UnremovedTranscripts'
txt_folder = 'UnremovedTranscriptsTxt'
min_masks = 18
c1 = "Red" #@param ["Grayscale", "Blue", "Green", "Red"]
c2 = "None" #@param ["None", "Blue", "Green", "Red"]

if img_folder == 'H&EStain':
    model_name = "heDRG123_"+str(min_masks)+"masks_"+c1[0]+c2[0]+'2'
else:
    model_name = "transDRG123_"+str(min_masks)+"masks_"+c1[0]+c2[0]+'2'
print(model_name)
#@markdown threshold on flow error to accept a mask (set higher to get more cells, e.g. in range from (0.1, 3.0), OR set to 0.0 to turn off so no cells discarded):
flow_threshold = 0.4 #@param {type:"slider", min:0.0, max:3.0, step:0.1}
#@markdown threshold on cellprob output to seed cell masks (set lower to include more pixels or higher to include fewer, e.g. in range from (-6, 6)):
cellprob_threshold = 0 #@param {type:"slider", min:-6, max:6, step:1}

out_path = base_dir+'/cellpose_'+model_name+'_'+str(flow_threshold)+'_'+str(cellprob_threshold)+'.csv'

# model name and path
train_dir = os.path.join(base_dir, img_folder, 'Train')
test_dir = os.path.join(base_dir, img_folder, 'Test')

#here we check that no model with the same name already exist, if so delete
model_path = os.path.join(train_dir, 'models', model_name)
print(model_path)

if img_folder == 'H&EStain':
    eval_dir = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Updated_Figures/DRG Full Images/H&EStain/DRG4'
    pred_path = eval_dir + '/csv/drg4.csv'
    man_path = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Xenium_segementation/ManuscriptProject/Scaled_DRG_Manual_Annotations/drg4_manual_scaled.csv'
else:
    eval_dir = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Updated_Figures/Evaluation Image/TransDRG4'
    pred_path = eval_dir + '/csv/croppedDRG4.csv'
    man_path = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Updated_Figures/Evaluation Image/ManualDRG4/drg4manual.csv'

# model name and path

#@markdown ###Custom model path (full path):

#model_path = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Batch2/DRG123/H&EStain/Train/models/heDRG123_5masks_RN_pre'

#@markdown ###Path to images:

dir = eval_dir #@param {type:"string"}

#@markdown ###Channel Parameters:

Channel_to_use_for_segmentation = c1 #@param ["Grayscale", "Blue", "Green", "Red"]

# @markdown If you have a secondary channel that can be used, for instance nuclei, choose it here:

Second_segmentation_channel= c2 #@param ["None", "Blue", "Green", "Red"]


# Here we match the channel to number
if Channel_to_use_for_segmentation == "Grayscale":
  chan = 0
elif Channel_to_use_for_segmentation == "Blue":
  chan = 3
elif Channel_to_use_for_segmentation == "Green":
  chan = 2
elif Channel_to_use_for_segmentation == "Red":
  chan = 1


if Second_segmentation_channel == "Blue":
  chan2 = 3
elif Second_segmentation_channel == "Green":
  chan2 = 2
elif Second_segmentation_channel == "Red":
  chan2 = 1
elif Second_segmentation_channel == "None":
  chan2 = 0

#@markdown ### Segmentation parameters:

#@markdown diameter of cells (set to zero to use diameter from training set):
diameter =  0 #@param {type:"number"}


# gets image files in dir (ignoring image files ending in _masks)
files = io.get_image_files(dir, '_heatmap.tif')
images = [io.imread(f) for f in files]

# declare model
model = models.CellposeModel(gpu=True,
                             pretrained_model=model_path)

# use model diameter if user diameter is 0
diameter = model.diam_labels if diameter==0 else diameter

print('starting test evaluation')
# run model on test images
masks, flows, styles = model.eval(images,
                                  channels=[chan, chan2],
                                  diameter=diameter,
                                  flow_threshold=flow_threshold,
                                  cellprob_threshold=cellprob_threshold
                                  )

from cellpose import io

io.masks_flows_to_seg(images,
                      masks,
                      flows,
                      files,
                      channels=[chan, chan2],
                      diams=diameter*np.ones(len(masks)),
                      )

save_rois_with_conversion(masks, files, eval_dir)

zip_to_csv(eval_dir)

print('done')

performance = optimized_model_eval_files(man_path, pred_path)

print(performance.keys())

# Call the function with the necessary arguments
res = optimize_flowprob(model, images, files, eval_dir, base_dir, txt_folder, diameter, chan, chan2)
res[0].to_csv('/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Updated_Figures/Flow Prob/'+model_name+'_probflow_f1_50.csv')
res[1].to_csv('/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Updated_Figures/Flow Prob/'+model_name+'_probflow_f1_50_95.csv')
res[2].to_csv('/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Updated_Figures/Flow Prob/'+model_name+'_allVals.csv')

>>> GPU activated? YES
Ready
transDRG123_18masks_RN2
/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Batch2/DRG123/UnremovedTranscripts/Train/models/transDRG123_18masks_RN2
starting test evaluation
done
        Unnamed: 0    Name     X     Y
0                0    roi0   892  1795
1                1    roi0   891  1796
2                2    roi0   891  1796
3                3    roi0   891  1796
4                4    roi0   891  1796
...            ...     ...   ...   ...
137445      137445  roi305  1754   807
137446      137446  roi305  1753   806
137447      137447  roi305  1753   806
137448      137448  roi305  1753   806
137449      137449  roi305  1753   806

[137450 rows x 4 columns]
Manual conversion
Polygon for ROI "4" is invalid: Self-intersection[1257 2600].
[<POLYGON ((1265 2578, 1264 2578, 1263 2578, 1263 2577, 1262 2577, 1261 2577,...>]
Polygon for ROI "22" is invalid: Ring Self-intersection[2012 1610].
[<POLYGON ((2011 1610, 2010 1610, 2009 1610, 2009 1611, 2008 1

Flow Threshold Loop:   0%|                               | 0/30 [00:00<?, ?it/s]
Prob Threshold Loop (flow=0.1):   0%|                    | 0/13 [00:00<?, ?it/s][A
Flow Threshold Loop:   0%|                               | 0/30 [00:05<?, ?it/s][A


KeyboardInterrupt: 

In [None]:
#CURRENTLY RUNNING