# 2D cell segmentation with cellpose


* **Make sure GPU is selected in Colab notebook under "Edit" &rarr; "Notebook settings".**
* Modified by Tim Cheung from [this Colab notebook](https://colab.research.google.com/github/MouseLand/cellpose/blob/master/notebooks/run_cellpose_GPU.ipynb).
* For full cellpose documentation, see http://www.cellpose.org/docs
* Please cite ***cellpose*** algorithm from [Stringer *et al.* (2020) Nature Method](https://www.nature.com/articles/s41592-020-01018-x). 
* Cellpose package repo is at https://github.com/MouseLand/cellpose.
* See also README at https://github.com/MouseLand/cellpose for more Colab notebook implementations, including one that can train the model with your own data set.
* For importing ROI txt into ImageJ, download "imagej_roi_converter.py" from cellpose's GitHub (or right click [here](https://github.com/MouseLand/cellpose/raw/master/imagej_roi_converter.py) &rarr; "save link as"). With your image already open in ImageJ, run the above file as a macro in ImageJ (it should automatically download Jython), then open the corresponding ROI txt file. See [here](https://cellpose.readthedocs.io/en/latest/outputs.html#roi-manager-compatible-output-for-imagej) for details.
* GitHub [link](https://github.com/thccheung/colab-nb-cellpose) to this notebook.


# Setup

## Cellpose installation

Install cellpose -- by default the torch GPU version is installed in COLAB notebook.

**Note that cellpose uses the latest version of numpy, so please click the "Restart runtime" button once the install completes, if needed.**

In [None]:
!pip install "opencv-python-headless<4.3"
!pip install cellpose

Check CUDA version and GPU

In [None]:
!nvcc --version
!nvidia-smi

Import libraries and check GPU (the first time you import cellpose the models will download).

In [None]:
# Cellpose-related

import numpy as np
import time, os, sys
from urllib.parse import urlparse
import skimage.io
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
mpl.rcParams['figure.dpi'] = 300

from urllib.parse import urlparse
from cellpose import models, core

use_GPU = core.use_gpu()
print('>>> GPU activated? %d'%use_GPU)

# call logger_setup to have output of cellpose written
from cellpose.io import logger_setup
logger_setup();

## Get images

Mount google drive

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Read images

In [None]:
import glob
from pathlib import Path
from cellpose import utils
from skimage import io
import os

##### SETUP OPTIONS #####
# Script will take as input: image files that contain a stack of channels, and
# then split them to individual channels for downstream cellpose processing

img_path = "/content/drive/My Drive/cellpose/" # Folder where original images are located (make sure there is a "/" at the end of the path )
img_proc_path = img_path + "img_proc/" # Images are channel-split (if needed) and saved to this folder for segmentation
output_path = img_path + "mask/" # Output masks (.npy) saved to this folder
file_done_dir = img_path + "file_done/" # Images processed by cellpose will be moved from "img_proc_path" to here. Good for when colab notebook disconnect you half way through.
img_orig_ext = ".tif" # Original image's extension
img_save_ext = ".tif" # Split channel images saved with this extension

split_channel_names = ['ch1', 'ch2', 'ch3', 'ch4'] # Channel names for splitting (used as suffix in saved image files). Ignored if no need to split channels
channel_to_segment = ['ch1', 'ch2', 'ch3', 'ch4'] # Channel to segment (needs to be match channel names in file names, or "split_channel_names")

show_pre_segment_img = True # Flag to show loaded pre-segmented images below (may cost RAM)

# cellpose options
show_cellpose_outcome_img = True # Flag to show cellpose outcome as image below. Good for diagnostic, but may crash notebook if too many files (out of RAM)
cyto_or_nuclei = 'cyto' # cellpose model type, can be 'cyto' or 'nuclei'. 'cyto' seems to work better even for DAPI
save_imagej_roi = True # Flag to save masks as text for ImageJ ROI processing
roi_txt_path = img_path + "roi_txt/" # ROI txt files saved to this folder
save_cellprob = False # Flag to save cell probability for each mask as .npy (for diagnostc purposes)
cellprob_path = img_path + "cellprob/" # Cell probability saved to this folder


##### REST OF CODE #####
img_files = glob.glob(img_path + "*" + img_orig_ext)

# split image stack into individual channels if needed
Path(img_proc_path).mkdir(exist_ok=True)
for k, f in enumerate(img_files):  
  
  ### In previous version, io.imread was buggy for some .tif images. However, recently plt.imread became buggy and io.imread fixed itself.
  ### So, using io.imread instead. Previous code saved just in case.
  # if img_orig_ext == ".tif":
  #  img = plt.imread(f) # using plt.imread instead of skimage.io.imread for tif files, as skimage.io.imread crashes with some tif compression
  # else:
  #   img = io.imread(f)  

  img = io.imread(f)  

  if len(img.shape) > 2:
    n_chn = img.shape[2]
    split_chn_flag = True
  else:
    split_chn_flag = False
    n_chn = 1
    img = img[..., np.newaxis]

  if show_pre_segment_img:
    plt.figure(k+1)
  
  for chn in range(n_chn):
    img_chn = img[:, :, chn]
    if split_chn_flag:
      new_fname = os.path.splitext(os.path.basename(f))[0] + \
      "_" + split_channel_names[chn] + img_save_ext
    else:
      new_fname = os.path.splitext(os.path.basename(f))[0] + img_save_ext
    new_fpath = img_proc_path + new_fname
    io.imsave(new_fpath, img_chn)

    if show_pre_segment_img:
      plt.subplot(1, 4, chn + 1)
      plt.imshow(img_chn, cmap="gray")

# pull out channels to segment
img_files = glob.glob(img_proc_path + "*" + img_save_ext)
imgs = [io.imread(f) for f in img_files]

if channel_to_segment == "all":
  seg_files = img_files
else:
  def extract_fname(lst):  # onnly pull out file names of correct channel
    seg_files = []
    for ch in channel_to_segment:
      seg_files.extend([el for el in lst if ch in el])
    return seg_files
  seg_files = extract_fname(img_files)
seg_files.sort()
print(*seg_files, sep='\n')

imgs = [io.imread(f) for f in seg_files]
nimg = len(imgs)
print("Total " + str(nimg) + " images to process.")

# Run cellpose on 2D images

## Setup channel parameters

In [None]:
import pandas as pd

##### SETUP OPTIONS #####
# Setup parameters for each channel
# Make sure order is correct for each parameter
param = {'channel': ['ch1', 'ch2', 'ch3', 'ch4'],
         'cell diam': [38, 28, 30, 30],  # Typical cell diameter, in pixels
         'flow threshold': [0.4, 0.4, 0.4, 0.4], # Flow threshold (for cell shape), ranges from 0 (strict) to 1 (relaxed); increase -> more cells segmented (but may increase false positives)
         'cellprob threshold': [0, 0, 0, 0] # Cell probability threshold, ranges from -6 (relaxed) to 6 (strict); decrease -> more cells segmented (but may increase false positives)
         }


##### REST OF CODE #####
df_channel = pd.DataFrame(param)
df_channel.set_index('channel', inplace=True)
print(df_channel)

## Run cellpose

In [None]:
# RUN CELLPOSE

##### REST OF CODE #####
from cellpose import models, plot, io
from shutil import copyfile
import os

Path(output_path).mkdir(exist_ok=True)  # make directory for output masks
Path(file_done_dir).mkdir(exist_ok=True)  # finished files will be moved here
if save_imagej_roi:
  Path(roi_txt_path).mkdir(exist_ok=True)
if save_cellprob:
  Path(cellprob_path).mkdir(exist_ok=True)

# DEFINE CELLPOSE MODEL
# model_type = 'cyto' seems to work better even for Fos. Probably because
# cellpose's nuclei model was trained on DAPI-stained, not Fos-stained
model = models.Cellpose(gpu=use_GPU, model_type=cyto_or_nuclei) # model_type='cyto' or 'nuclei'

"""
# define CHANNELS to run segementation on
# grayscale=0, R=1, G=2, B=3
# channels = [cytoplasm, nucleus]
# if NUCLEUS channel does not exist, set the second channel to 0
# channels = [0,0]
# IF ALL YOUR IMAGES ARE THE SAME TYPE, you can give a list with 2 elements
# channels = [0,0] # IF YOU HAVE GRAYSCALE
# channels = [2,3] # IF YOU HAVE G=cytoplasm and B=nucleus
# channels = [2,1] # IF YOU HAVE G=cytoplasm and R=nucleus
# or if you have different types of channels in each image
channels = [[2,3], [0,0], [0,0]]
"""
cp_channels = [0, 0]

# if diameter is set to None, the size of the cells is estimated on a per image basis
# you can set the average cell `diameter` in pixels yourself (recommended) 
# diameter can be a list or a single number for all images


"""
# *** Min size argument in cellpose's model.eval does not seem to work ***
minSizeFact = 1.5 # factor for calculating minimum size based on diam
minSize = round(minSizeFact * np.pi*(cell_diam/2)**2)
"""

for img, f in zip(imgs, seg_files):

  img_fname = os.path.splitext(os.path.basename(f))[0] # pull out file name  
  channel_flag = False

  # Retrieve cellpose parameters from channel dataframe
  for chl in df_channel.index:

    if chl in img_fname:
      cell_diam = df_channel.loc[chl]['cell diam']
      flow_threshold = df_channel.loc[chl]['flow threshold']
      cellprob_threshold = df_channel.loc[chl]['cellprob threshold']
      channel_flag = True
    else:
      pass

  if channel_flag == False:
    raise ValueError("Channel name not in filename for %s"  % os.path.basename(f))

  # Segment using cellpose
  masks, flows, styles, diams = model.eval(img, diameter= cell_diam,
                                           normalize = True,
                                           flow_threshold=flow_threshold,
                                           cellprob_threshold=cellprob_threshold,
                                           channels=cp_channels)
  
  # Save outcome to file  
  save_fpath = output_path + img_fname
  np.save(save_fpath + '_mask.npy',
        masks.astype(np.uint16) if np.max(masks)<2**16-1 else masks.astype(np.uint32))
  
  # Save masks as text for ImageJ ROI if called
  # image_name is file name of image
  # masks is numpy array of masks for image
  if save_imagej_roi:
    base = os.path.splitext(img_fname)[0]
    outlines = utils.outlines_list(masks)
    io.outlines_to_text(base, outlines)

    roi_path = glob.glob("/content/" + base + "_cp_outlines.txt")
    for i in roi_path:
      fname = os.path.basename(i)
      copyfile(i, roi_txt_path + fname)
  
  # Save cell probabilities if called
  if save_cellprob:
    cellprob = flows[2]
    np.save(cellprob_path + img_fname + '_cellprob.npy', cellprob)

  print('Finished processing ' + img_fname)

  # Move finished file to "file done" subdirectory
  file_done_path = file_done_dir + os.path.basename(f)
  os.replace(f, file_done_path)

  # Show outcome (can crash notebok if too many files)
  
  if show_cellpose_outcome_img:

    # Cellpose Plot outcome
    
    fig = plt.figure(figsize=(12,5))  
    plot.show_segmentation(fig, img, masks, flows[0], channels=cp_channels)
    plt.tight_layout()
    plt.show() 
