# Phenotype classifcation using CellX 

This notebook shows how to take segmented time lapse microscopy images and use h2b fluorescence markers to classfiy mitotic state of the cell cycle. 

The sections of this notebook are as follows:

1. Load images
2. Localise the objects
3. Classify the objects
4. Filter the objects
5. Run btrack, uniting the objects locations over time

The data used in this notebook is timelapse microscopy data with h2b-gfp/rfp markers that show the spatial extent of the nucleus and it's mitotic state. 

This notebook uses the dask octopuslite image loader from the CellX/Lowe lab project.

In [1]:
from octopuslite import DaskOctopusLiteLoader
import btrack
from tqdm import tqdm
import numpy as np
from scipy.special import softmax
import os
import matplotlib.pyplot as plt
from skimage.io import imread, imshow
%matplotlib inline
plt.rcParams['figure.figsize'] = [18,8]

In [2]:
def segmentation_generator(files, crop = None):
    """Segmentation generator"""
    #get dims
    shape = imread(files[0]).shape
    dims = imread(files[0]).ndim
    if crop == None:
        for filename in files:
            img = imread(filename)
            yield img
    else:
        cslice = lambda d: slice(
            int((shape[d] - crop[d]) // 2),
            int((shape[d] - crop[d]) // 2 + crop[d]))
        crops = tuple([cslice(d) for d in range(dims)])
        print(crops)
        for filename in files:
            img = imread(filename)[crops]
            yield img
        plt.imshow(img)
        plt.show()

## 1. Load segmentation images

In [None]:
# load images
expt = 'ND0009'
pos = 'Pos3'
image_path = f'/home/nathan/data/kraken/ras/{expt}/{pos}/{pos}_stardist_masks'
masks = DaskOctopusLiteLoader(image_path, crop=(1200,1600), remove_background=True)
## efficiently load segmentation images by providing the path to the generator function
segmentation_gfp = segmentation_generator(masks.files('mask_gfp'), crop=(1200,1600))
segmentation_rfp = segmentation_generator(masks.files('mask_rfp'), crop=(1200,1600))

## 2. Localise the objects

#### GFP object localisation

In [4]:
objects_gfp = btrack.utils.segmentation_to_objects(
    segmentation_irfp,
    properties = ('area', ),
)

[INFO][2022/01/13 05:02:48 PM] Localizing objects from segmentation...
[INFO][2022/01/13 05:03:16 PM] Objects are of type: <class 'dict'>
[INFO][2022/01/13 05:03:17 PM] ...Found 196104 objects in 1072 frames.


#### (Optional) RFP object localisation

In [5]:
objects_rfp = btrack.utils.segmentation_to_objects(
    segmentation_rfp,
    properties = ('area', ),
)

[INFO][2022/01/13 05:03:17 PM] Localizing objects from segmentation...
[INFO][2022/01/13 05:03:32 PM] Objects are of type: <class 'dict'>
[INFO][2022/01/13 05:03:33 PM] ...Found 37211 objects in 1072 frames.


## 3. Classify the objects 

In [6]:
from cellx import load_model
from cellx.tools.image import InfinitePaddedImage
from skimage.transform import resize

model = load_model('/home/nathan/analysis/segment-classify-track/models/cellx_classifier_stardist.h5')

In [7]:
LABELS = ["interphase", "prometaphase", "metaphase", "anaphase", "apoptosis"]

In [8]:
def normalize_channels(x):

    for dim in range(x.shape[-1]):
        x[..., dim] = normalize(x[..., dim])
        
    return x

def normalize(x):

    xf = x.astype(np.float32)
    mx = np.mean(xf)
    sd = np.max([np.std(xf), 1./np.prod(x.shape)])

    return (xf - mx) / sd

In [21]:
def classify_objects(image, objects, obj_type=1):
    labels = []
    for n in tqdm(range(image.shape[0])):

        _objects = [o for o in objects if o.t == n]

        crops = []
        to_update = []
        
        
        fp = gfp if obj_type == 1 else rfp


        frame = np.stack(
            [image[n, ...].compute(), fp[n, ...].compute()], 
            axis=-1,
        ) 

        vol = InfinitePaddedImage(frame, mode = 'reflect')

        for obj in _objects:
            xs = slice(int(obj.x-40), int(obj.x+40), 1)
            ys = slice(int(obj.y-40), int(obj.y+40), 1)

            crop = vol[ys, xs, :]
            crop = resize(crop, (64, 64), preserve_range=True).astype(np.float32)

            if crop.shape == (64 ,64, 2):
                crops.append(normalize_channels(crop))
                to_update.append(obj)
            else:
                print(crop.shape)

        if not crops:
            continue


        pred = model.predict(np.stack(crops, axis=0))

        assert pred.shape[0] == len(_objects)
        for idx in range(pred.shape[0]):
            obj = _objects[idx]

            pred_label = np.argmax(pred[idx, ...])
            pred_softmax = softmax(pred[idx, ...])

            logits = {f"prob_{k}": pred_softmax[ki] for ki, k in enumerate(LABELS)}

            obj.label = pred_label
            obj.properties = logits
            
    return objects

#### Load raw images for classifier

In [11]:
images = DaskOctopusLiteLoader(f'/home/nathan/data/kraken/ras/{expt}/{pos}/{pos}_aligned', crop = (1200,1600))
bf = images['brightfield']
gfp = images['gfp']
rfp = images['rfp']

#### Classify objects

In [22]:
objects_gfp = classify_objects(bf, objects_gfp, obj_type = 1)
objects_rfp = classify_objects(bf, objects_rfp, obj_type = 2)

100%|██████████| 1072/1072 [22:01<00:00,  1.23s/it]


#### Inspect objects

In [28]:
objects_gfp[0]

Unnamed: 0,ID,x,y,z,t,dummy,states,label,prob,area,prob_interphase,prob_prometaphase,prob_metaphase,prob_anaphase,prob_apoptosis
0,0,966.112975,1126.130313,0.0,0,False,0,4,0.0,1788,2e-05,8e-06,2e-06,1.6e-05,0.999954


#### Save out classified GFP objects

In [27]:
with btrack.dataio.HDF5FileHandler(
    os.path.join(f'/home/nathan/data/kraken/ras/{expt}/{pos}/segmented_gfp.h5'), 'w', obj_type='obj_type_1',
) as hdf:
    hdf.write_segmentation(masks['mask_irfp'])
    hdf.write_objects(objects_gfp)

[INFO][2022/01/13 06:07:08 PM] Opening HDF file: /home/nathan/data/kraken/ras/ND0009/Pos3/segmented_gfp.h5...
[INFO][2022/01/13 06:10:00 PM] Writing objects/obj_type_1
[INFO][2022/01/13 06:10:00 PM] Writing labels/obj_type_1
[INFO][2022/01/13 06:10:00 PM] Writing properties/obj_type_1
[INFO][2022/01/13 06:10:00 PM] Closing HDF file: /home/nathan/data/kraken/ras/ND0009/Pos3/segmented_gfp.h5


#### Save out classified RFP objects

In [None]:
with btrack.dataio.HDF5FileHandler(
    os.path.join(f'/home/nathan/data/kraken/ras/{expt}/{pos}/segmented.h5'), 'w', obj_type='obj_type_2',
) as hdf:
    hdf.write_segmentation(masks['mask_rfp'])
    hdf.write_objects(objects_rfp)

## 4. Filter the objects 

Based on segments that are too small to feasibly be cells

In [29]:
filtered_gfp_objects = [o for o in objects_gfp if o.properties['area']>100.]
filtered_rfp_objects = [o for o in objects_rfp if o.properties['area']>100.]

## 5. Run btrack  

Unite each object with it's subsequent position at the following time point and export as a tracking file

#### For GFP objects

In [31]:
# initialise a tracker session using a context manager
with btrack.BayesianTracker() as tracker:

    # configure the tracker using a config file
    tracker.configure_from_file(
        "/home/nathan/analysis/BayesianTracker/models/MDCK_config_new.json"
    )
    tracker.max_search_radius = 40

    # append the objects to be tracked
    tracker.append(filtered_gfp_objects)

    # set the volume
    tracker.volume=((0, 1200), (0, 1600), (-1e5, 1e5))

    # track them (in interactive mode)
    tracker.track_interactive(step_size=100)

    # generate hypotheses and run the global optimizer
    tracker.optimize()

    tracker.export((f'/home/nathan/data/kraken/ras/{expt}/{pos}/tracks.h5'), obj_type='obj_type_1')

    # get the tracks in a format for napari visualization (optional)
    data, properties, graph = tracker.to_napari(ndim=2)
    
    gfp_tracks = tracker.tracks

[INFO][2022/01/13 06:13:08 PM] Loaded btrack: /home/nathan/src/btrack/btrack/libs/libtracker.so
[INFO][2022/01/13 06:13:08 PM] btrack (v0.4.2) library imported
[INFO][2022/01/13 06:13:08 PM] Setting max XYZ search radius to: 100
[INFO][2022/01/13 06:13:08 PM] Starting BayesianTracker session
[INFO][2022/01/13 06:13:08 PM] Loading configuration file: /home/nathan/analysis/BayesianTracker/models/MDCK_config_new.json
[INFO][2022/01/13 06:13:08 PM] Loading motion model: b'MDCK_motion_Kristina'
[INFO][2022/01/13 06:13:08 PM] Setting max XYZ search radius to: 40
[INFO][2022/01/13 06:13:08 PM] Objects are of type: <class 'list'>
[INFO][2022/01/13 06:13:09 PM] Set volume to ((0, 1200), (0, 1600), (-100000.0, 100000.0))
[INFO][2022/01/13 06:13:09 PM] Starting tracking... 
[INFO][2022/01/13 06:13:09 PM] Tracking objects in frames 0 to 99 (of 1072)...
[INFO][2022/01/13 06:13:09 PM]  - Timing (Bayesian updates: 0.99ms, Linking: 0.19ms)
[INFO][2022/01/13 06:13:09 PM]  - Probabilities (Link: 1.00000

#### For RFP objects

In [None]:
# initialise a tracker session using a context manager
with btrack.BayesianTracker() as tracker:

    # configure the tracker using a config file
    tracker.configure_from_file(
        "/home/nathan/analysis/BayesianTracker/models/MDCK_config_new.json"
    )
    tracker.max_search_radius = 40

    # append the objects to be tracked
    tracker.append(filtered_rfp_objects)

    # set the volume
    tracker.volume=((0, 1200), (0, 1600), (-1e5, 1e5))

    # track them (in interactive mode)
    tracker.track_interactive(step_size=100)

    # generate hypotheses and run the global optimizer
    tracker.optimize()

    tracker.export((f'/home/nathan/data/kraken/ras/{expt}/{pos}/tracks.h5'), obj_type='obj_type_2')

    # get the tracks in a format for napari visualization (optional)
    data, properties, graph = tracker.to_napari(ndim=2)
    
    rfp_tracks = tracker.tracks

In [33]:
gfp_tracks[0]

Unnamed: 0,ID,t,x,y,z,parent,root,state,generation,dummy,prob_anaphase,prob_interphase,area,prob_apoptosis,prob_prometaphase,prob_metaphase
0,48,0,920.497843,946.520276,0.0,48,48,4,0,False,0.00112092,3.788973e-06,1159,0.998871,2.321098e-07,3.649791e-06
1,48,1,919.07573,947.135949,0.0,48,48,4,0,False,2.060163e-07,1.583184e-06,1096,0.999998,8.663034e-08,1.555668e-10
2,48,2,920.665741,950.576852,0.0,48,48,4,0,False,4.237243e-07,3.041058e-06,1080,0.989729,0.01026728,3.060012e-09
3,48,3,921.846449,950.96833,0.0,48,48,4,0,False,1.557778e-05,4.036627e-06,1042,0.999975,4.965856e-06,3.947499e-09
4,48,4,921.33114,952.837719,0.0,48,48,4,0,False,0.0002424746,1.432962e-06,912,0.999749,6.365148e-06,9.571741e-07
5,48,5,921.686593,952.971819,0.0,48,48,4,0,False,3.133375e-08,2.45744e-08,1171,1.0,9.052188e-08,1.747792e-11
6,48,6,922.479682,953.998233,0.0,48,48,4,0,False,7.917907e-06,5.576101e-07,1132,0.99999,1.630473e-06,1.130596e-08
7,48,7,922.304653,954.204565,0.0,48,48,4,0,False,1.401395e-05,1.089339e-06,1139,0.999983,1.724238e-06,5.722831e-10
8,48,8,923.651416,957.188453,0.0,48,48,4,0,False,2.39266e-06,2.009824e-08,918,0.999998,1.274927e-09,1.078397e-11
9,48,9,921.430493,954.403587,0.0,48,48,4,0,False,6.697876e-06,2.927035e-07,1115,0.999993,4.100855e-08,3.770279e-10


In [None]:
rfp_tracks[0]