In [1]:
import glob
import os
import logging
import sys

import numpy as np
import tifffile
from skimage import io
import skimage as ski
from cellpose import models
import napari
import torch

# Setup

This block is completely optional, we use it to see a little under the hood of what cellpose is doing during the training but it is not necessary.

In [2]:
r = logging.getLogger()
r.setLevel(logging.INFO)
h = logging.StreamHandler(sys.stdout)
h.setLevel(logging.INFO)
r.addHandler(h)

In [3]:
viewer = napari.Viewer()

### Loading files

These images will be 2 channel, the first is the raw data and the 2nd the labeled image we annotated.

In [6]:
files = sorted(glob.glob("files/Training/training_images/*.tif"))
len(files)

23

Read the images for training. It is of the utmost importance that the masks be label images, not binary images.
I know some of the images in this traing seet are binary, so run the skimage label on the seconds channel to turn it into a labeled image.

In [7]:
images = list()
masks = list()

for f in files:
    x = tifffile.imread(f)
    images.append(x[0])
    masks.append(ski.measure.label(x[1].astype(np.uint16)))
    

Cellpose wants a list of our raw images and a list of our label images so we split them up

In [8]:
masks[5].max(), masks[5].min()

(39, 0)

# Training

Use models.CellposeModel now instead of models.Cellpose


In [9]:
if sys.platform == 'darwin':
    d = torch.device('mps')
    model = models.CellposeModel(gpu=False, device=d, model_type='cyto2')
else:
    # change gpu=True if on windows, and get rid of device
    model = models.CellposeModel(gpu=True, model_type='cyto2')

>> cyto2 << model set to be used
** TORCH CUDA version installed and working. **
>>>> using GPU
>>>> model diam_mean =  30.000 (ROIs rescaled to this size during training)


Cellpose automatically figures out what diameter would be good to use for downsampling our image based on the average size of the objects in the training data we provide.

In [10]:
model.train(images, masks, channels=[0, 0], save_path='models', n_epochs=100,
            nimg_per_epoch=24, model_name='custom', batch_size=16,
            min_train_masks=1)

computing flows for labels


100%|██████████| 23/23 [00:30<00:00,  1.31s/it]


>>>> median diameter set to = 30
>>>> mean of training label mask diameters (saved to model) 183.167
>>>> training network with 2 channel input <<<<
>>>> LR: 0.20000, batch_size: 16, weight_decay: 0.00001
>>>> ntrain = 23
>>>> nimg_per_epoch = 24
Epoch 0, Time  2.5s, Loss 0.9245, LR 0.0000
saving network parameters to models\models/custom
Epoch 5, Time 10.7s, Loss 0.5197, LR 0.1111
Epoch 10, Time 18.5s, Loss 0.2215, LR 0.2000
Epoch 20, Time 34.2s, Loss 0.1712, LR 0.2000
Epoch 30, Time 49.7s, Loss 0.1485, LR 0.2000
Epoch 40, Time 65.2s, Loss 0.1422, LR 0.2000
Epoch 50, Time 81.3s, Loss 0.1342, LR 0.2000
Epoch 60, Time 97.4s, Loss 0.1318, LR 0.2000
Epoch 70, Time 112.5s, Loss 0.1291, LR 0.2000
Epoch 80, Time 128.9s, Loss 0.1255, LR 0.2000
Epoch 90, Time 144.7s, Loss 0.1243, LR 0.2000
saving network parameters to models\models/custom


'models\\models/custom'

Now check how well the model is doing.

This model only has 3 return values, so get rid of the last 

In [11]:
idx = 7
x = tifffile.imread(files[idx])
test_masks, flows, _ = model.eval(x[0], channels=[0, 0],
                             cellprob_threshold=0, flow_threshold=.4)

In [12]:
viewer.layers.clear()
viewer.add_image(x[0])
viewer.add_labels(test_masks)

<Labels layer 'test_masks' at 0x20231979120>

In [13]:
viewer.add_image(flows[2])

<Image layer 'Image [1]' at 0x202361a5060>