# Training a Cellpose model

We will first install cellpose 2.0, check the GPU is working, and get your models and images.

In [None]:
!nvcc --version
!nvidia-smi

import os, shutil
import numpy as np
import matplotlib.pyplot as plt
from cellpose import core, utils, io, models, metrics
from glob import glob
import napari
from natsort import natsorted

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')

In [None]:
base_dir = '/mnt/DATA/macrohet/upstream_development/segmentation/cellpose_training/'

In [None]:

test_dir = os.path.join(base_dir, 'training_dir')
ch1, ch2 = 0,0 # 'Grayscale', None


In [None]:
# get files # using their function 
output = io.load_train_test_data(train_dir=test_dir, test_dir=None, mask_filter='_gt_inst_masks')
train_data, train_labels, fns, test_data, test_labels, _ = output

In [None]:
test_data = train_data
test_labels = train_labels

## Evaluate on test data (optional)

If you have test data, check performance

In [None]:

# defining pretrained cellpose model to use
model = models.Cellpose(
                        gpu=True, 
                        model_type='cyto', 
                        net_avg=True, 
#                         device=torch.device('cuda')
                        )

In [None]:
from tqdm.auto import tqdm


In [None]:

# run model on test images
masks = [model.eval(test_data[i], 
                   channels=[ch1, ch2],
                   diameter=325, progress=True)[0]
        for i in tqdm(range(len(test_data)))]
 

In [None]:
               
# check performance using ground truth labels
ap = metrics.average_precision(test_labels, masks)[0]
print('')
print(f'>>> average precision at iou threshold 0.5 = {ap[:,0].mean():.3f}')


In [None]:
aji = metrics.aggregated_jaccard_index(test_labels, masks)

In [None]:
f'{aji.mean():.3f}'

In [None]:
ap, aji

In [None]:
v = napari.Viewer()
for i in range(len(np.stack(test_data, axis = 0))):
    v.add_image(np.stack(test_data, axis = 0)[i], colormap='green', contrast_limits=(0,6000))
# v.add_labels(masks, num_colors=1)
    v.add_labels(np.stack(test_labels, axis = 0)[i], num_colors=1)


In [None]:
v = napari.Viewer()

v.add_image(np.stack(test_data, axis = 0), colormap='green', contrast_limits=(0,6000))
v.add_labels(np.stack(masks, axis =0), num_colors = 1)
v.add_labels(np.stack(test_labels, axis = 0), num_colors=1)

## Evaluate on test data for my model (optional)

testing on model v1 which had 7 out of 8 training images, testing on latest gt image


In [None]:

model_path = '/mnt/DATA/macrohet/upstream_development/segmentation/cellpose_training/models/models/macrohet_seg'
model = models.CellposeModel(gpu=True, 
                             pretrained_model=model_path)

In [None]:
fns

In [None]:
fns.pop(-2)

In [None]:
fns

In [None]:
test_data.pop(-2)

In [None]:
len(test_data)

In [None]:

# run model on test images
masks = [model.eval(test_data, 
                   channels=[ch1, ch2],
                   progress=True)[0]
        for i in tqdm(range(len(test_data)))]
 

In [None]:
test_labels.pop(-2)

In [None]:
len(test_labels)

In [None]:
masks

In [None]:
               
# check performance using ground truth labels
ap = metrics.average_precision(test_labels, masks)[0]
print('')
print(f'>>> average precision at iou threshold 0.5 = {ap[:,0].mean():.3f}')


In [None]:
masks = np.load('masks.npy')

In [None]:
test_labels=np.load('test_labels.npy') 

In [None]:
aji = metrics.aggregated_jaccard_index(test_labels, masks)

In [None]:
f'{aji.mean():.3f}'

In [None]:
ap, aji

In [None]:
v = napari.Viewer()
for i in range(len(np.stack(test_data, axis = 0))):
    v.add_image(np.stack(test_data, axis = 0)[i], colormap='green', contrast_limits=(0,6000))
# v.add_labels(masks, num_colors=1)
    v.add_labels(np.stack(test_labels, axis = 0)[i], num_colors=1)


In [None]:
v = napari.Viewer()

v.add_image(np.stack(test_data, axis = 0), colormap='green', contrast_limits=(0,6000))
v.add_labels(np.stack(masks, axis =0), num_colors = 1)
v.add_labels(np.stack(test_labels, axis = 0), num_colors=1)

# 250 epochs on latest model version

In [None]:
# get files (during training, test_data is transformed so we will load it again)
# output = io.load_train_test_data(test_dir, mask_filter='_seg.npy')
# test_data = [io.imread(fn) for fn in natsorted(glob(os.path.join(test_dir, 'raw_images/')))]
# test_labels = [io.imread(fn) for fn in natsorted(glob(os.path.join(test_dir, 'ground_truth_inst/*')))]
test_data = [io.imread('/mnt/DATA/macrohet/upstream_development/segmentation/cellpose_training/test_dir/raw_images/r06c09f0*p0*-ch1sk1fk1fl1.tiff')]
test_labels = [io.imread('/mnt/DATA/macrohet/upstream_development/segmentation/cellpose_training/test_dir/ground_truth_inst/gt_inst_r06c09f0*p0*-ch1sk1fk1fl1.tiff')]
# run model on test images
masks = model.eval(test_data, 
                   channels=[chan, chan2],
                   diameter=diam_labels)[0]

# check performance using ground truth labels
ap = metrics.average_precision(test_labels, masks)[0]
print('')
print(f'>>> average precision at iou threshold 0.5 = {ap[:,0].mean():.3f}')


# 10k epochs

In [None]:
# get files (during training, test_data is transformed so we will load it again)
# output = io.load_train_test_data(test_dir, mask_filter='_seg.npy')
# test_data = [io.imread(fn) for fn in natsorted(glob(os.path.join(test_dir, 'raw_images/')))]
# test_labels = [io.imread(fn) for fn in natsorted(glob(os.path.join(test_dir, 'ground_truth_inst/*')))]
test_data = [io.imread('/mnt/DATA/macrohet/upstream_development/segmentation/cellpose_training/test_dir/raw_images/r06c09f0*p0*-ch1sk1fk1fl1.tiff')]
test_labels = [io.imread('/mnt/DATA/macrohet/upstream_development/segmentation/cellpose_training/test_dir/ground_truth_inst/gt_inst_r06c09f0*p0*-ch1sk1fk1fl1.tiff')]
# run model on test images
masks = model.eval(test_data, 
                   channels=[chan, chan2],
                   diameter=diam_labels)[0]

# check performance using ground truth labels
ap = metrics.average_precision(test_labels, masks)[0]
print('')
print(f'>>> average precision at iou threshold 0.5 = {ap[:,0].mean():.3f}')


In [None]:
epoch_score_dict = {10:0.884, 250:0.899, 10000:0.868}

In [None]:
from macrohet import notify

In [None]:
notify.send_sms('training complete')

plot masks

In [None]:

plt.figure(figsize=(12,8), dpi=150)
for k,im in enumerate(test_data):
    img = im.copy()
    plt.subplot(3,len(train_files), k+1)
    img = np.vstack((img, np.zeros_like(img)[:1]))
#     img = img.transpose(1,2,0)
    plt.imshow(img)
    plt.axis('off')
    if k==0:
        plt.title('image')

    plt.subplot(3,len(train_files), len(train_files) + k+1)
    plt.imshow(masks[k])
    plt.axis('off')
    if k==0:
        plt.title('predicted labels')

    plt.subplot(3,len(train_files), 2*len(train_files) + k+1)
    plt.imshow(test_labels[k])
    plt.axis('off')
    if k==0:
        plt.title('true labels')
plt.tight_layout()

# Use custom model to segment images

Take custom trained model from above, or upload your own model to google drive / colab runtime.

## Parameters

In [None]:
model_path

In [None]:
# model name and path

#@markdown ###Custom model path (full path):

model_path = "human_in_the_loop/train/models/CP_tissuenet" #@param {type:"string"}

#@markdown ###Path to images:

dir = "human_in_the_loop/test" #@param {type:"string"}

#@markdown ###Channel Parameters:

Channel_to_use_for_segmentation = "Green" #@param ["Grayscale", "Blue", "Green", "Red"]

# @markdown If you have a secondary channel that can be used, for instance nuclei, choose it here:

Second_segmentation_channel= "Red" #@param ["None", "Blue", "Green", "Red"]


# Here we match the channel to number
if Channel_to_use_for_segmentation == "Grayscale":
  chan = 0
elif Channel_to_use_for_segmentation == "Blue":
  chan = 3
elif Channel_to_use_for_segmentation == "Green":
  chan = 2
elif Channel_to_use_for_segmentation == "Red":
  chan = 1


if Second_segmentation_channel == "Blue":
  chan2 = 3
elif Second_segmentation_channel == "Green":
  chan2 = 2
elif Second_segmentation_channel == "Red":
  chan2 = 1
elif Second_segmentation_channel == "None":
  chan2 = 0

#@markdown ### Segmentation parameters:

#@markdown diameter of cells (set to zero to use diameter from training set):
diameter =  0#@param {type:"number"}
#@markdown threshold on flow error to accept a mask (set higher to get more cells, e.g. in range from (0.1, 3.0), OR set to 0.0 to turn off so no cells discarded):
flow_threshold = 0.4 #@param {type:"slider", min:0.0, max:3.0, step:0.1}
#@markdown threshold on cellprob output to seed cell masks (set lower to include more pixels or higher to include fewer, e.g. in range from (-6, 6)):
cellprob_threshold=0 #@param {type:"slider", min:-6, max:6, step:1}


if you're using the example test data we'll copy it to a new folder

In [None]:
src = 'human_in_the_loop/test'
if dir[:len(src)] == src:
    files = io.get_image_files(dir, '_masks')
    dir = 'human_in_the_loop/eval/'
    os.makedirs(dir, exist_ok=True)
    for f in files:
        dst = dir + os.path.split(f)[1]
        print(f'{f} > {dst}')
        shutil.copyfile(f, dst)

Here's what the command to train would be on the command line -- make sure if you run this locally to correct the paths for your local computer.

In [None]:
run_str = f'python -m cellpose --use_gpu --verbose --dir {dir} --pretrained_model {model_path} --chan {chan} --chan2 {chan2} --diameter {diameter} --flow_threshold {flow_threshold} --cellprob_threshold {cellprob_threshold}'
print(run_str)

## run custom model

how to run the custom model in a notebook

In [None]:
# gets image files in dir (ignoring image files ending in _masks)
files = io.get_image_files(dir, '_masks')
print(files)
images = [io.imread(f) for f in files]

# declare model
model = models.CellposeModel(gpu=True, 
                             pretrained_model=model_path)

# use model diameter if user diameter is 0
diameter = model.diam_labels if diameter==0 else diameter

# run model on test images
masks, flows, styles = model.eval(images, 
                                  channels=[chan, chan2],
                                  diameter=diameter,
                                  flow_threshold=flow_threshold,
                                  cellprob_threshold=cellprob_threshold
                                  )

## save output to *_seg.npy

you will see the files save in the Files tab and you can download them from there

In [None]:
from cellpose import io

io.masks_flows_to_seg(images, 
                      masks, 
                      flows, 
                      diameter*np.ones(len(masks)), 
                      files, 
                      [chan, chan2])

## save output masks to tiffs/pngs or txt files for imageJ

In [None]:
io.save_masks(images, 
              masks, 
              flows, 
              files, 
              channels=[chan, chan2],
              png=True, # save masks as PNGs and save example image
              tif=True, # save masks as TIFFs
              save_txt=True, # save txt outlines for ImageJ
              save_flows=False, # save flows as TIFFs
              save_outlines=False, # save outlines as TIFFs 
              )
    

In [None]:
f = files[0]
plt.figure(figsize=(12,4), dpi=300)
plt.imshow(io.imread(os.path.splitext(f)[0] + '_cp_output.png'))
plt.axis('off')