# Many-cell trajectory measurement


Designed to measure a series of time-dependent single-cell fluorescent/holographic intensities over the course of a time-lapse microscopy data set


```
Author: 
- Nathan J. Day (nathan.day.16@ucl.ac.uk)
```


### STEPS
1. Directory organisation
2. Load image information
3. Load the xyt data of each cell from HDF5 tracking file (see Arboretum tracker @quantumjot github)
4. Meta-analysis of tracks with optional filtering of anomalous tracks
5. Measure cellular properties across all images, all cells
6. Save information out as pandas df and .csv

In [14]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from scipy import ndimage
from skimage import io #used to be from skimage import io
from skimage.io import imshow
from sklearn import preprocessing
from matplotlib import animation
from natsort import natsorted
import btrack
from btrack.utils import import_HDF, import_JSON
from tqdm.notebook import tnrange, tqdm
from time import sleep
from pathlib import Path

## Measure 

Read cell mask information (location & area) and obtain fluorescent and phase stats. If the data already exists in a .csv and just needs to be plotted then skip to the plotter section.

#### Raw data (image) location

The image data needs to be organised inside the ```root_dir``` as follows:

```
phase/
    0000_phase.tif
    0001_phase.tif
    ...
gfp/
    0000_gfp.tif
    0001_gfp.tif
    ...
rfp/
    0000_rfp.tif
    0001_rfp.tif
    ...
labels/
    0000_mask.tif
    0001_mask.tif
    ...    
```

In [5]:
root_path = "/home/nathan/data/fucci/fucci1_171201/"

gfp_dir = os.path.join(root_path, "gfp")
rfp_dir = os.path.join(root_path, "rfp")
phase_dir = os.path.join(root_path, "phase")
mask_dir = os.path.join(root_path, "labels")

gfp_list = natsorted([m for m in os.listdir(gfp_dir) if m.endswith('.tif')])
rfp_list = natsorted([m for m in os.listdir(rfp_dir) if m.endswith('.tif')])
phase_list = natsorted([m for m in os.listdir(phase_dir) if m.endswith('.tif')])
mask_list = natsorted([m for m in os.listdir(mask_dir) if m.endswith('.tif')])

### Load the tracking information to obtain xyt of cells

Load tracks directly from hdf5

In [None]:
filename ='/home/nathan/data/fucci/fucci1_171201/tracks/tracks_cellpose/cellpose_nuclear.h5'
with btrack.dataio.HDF5FileHandler(filename, 'r') as h:
    tracks = h.tracks
tracks = (tracks[0]) #initially, tracks[0] is the tracks for cell type 0 (ie, WT or Scr etc)
print("Track information loaded")

## Meta-analysis of tracks

List of track lengths

In [None]:
### length of tracks 
for i in range(len(tracks)):
    print("track ID.", tracks[i].ID,"length=", len(tracks[i]))

Histogram of track lengths

In [None]:
#size, scale = 1000, 10
track_lengths = [len(track) for track in tracks] 
tracks_df = pd.Series(track_lengths)
print(max(tracks_df))

tracks_df.plot.hist(grid=True, bins=25, rwidth=0.9,
                   color='#607c8e')
title = 'Distribution of track lengths cellpose'
plt.title(title)
plt.xlabel('Track length')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)

### Filter track lengths if desired

In [None]:
filtered_tracks = [track for track in tracks if len(track) > 800] #excludes track lengths below 1000 frames
track_lengths = [len(track) for track in filtered_tracks] 
track_IDs = [track.ID for track in filtered_tracks] 
print(track_IDs)
for i in range(len(track_lengths)):
    print("ID = ", track_IDs[i], "/ Length = ", track_lengths[i])

# The _NUMBER CRUNCH_ zone

#### Data output directory creation 

In [9]:
mask_type = "cellpose_nuclear" # important to specify the origin of the masks so that results are printed correctly

In [15]:
### DIRECTORY CREATION ###
stats_dir = os.path.join(root_path, 'stats') 
mask_type_stats_dir = os.path.join(stats_dir, mask_type)     
    
Path(stats_dir).mkdir(parents=True, exist_ok=True)
Path(mask_type_stats_dir).mkdir(parents=True, exist_ok=True)   

## Calculations

In [None]:
print('Total calculation progress bar below (n/N cells):')
for j, cell in enumerate(tqdm(filtered_tracks)):
    ID = str(filtered_tracks[j].ID)
    print('cell ID = ',ID, 'progress bar below: (n/N frames):')## add to df
    frames = filtered_tracks[j].t
    x = filtered_tracks[j].x 
    y = filtered_tracks[j].y

    fn = "cell_ID_" + ID + ".csv"
    stat_file = os.path.normpath(os.path.join(mask_type_stats_dir, fn))

    df = pd.DataFrame([], columns=['Cell ID', 'Frame', 'GFP Intensity', 'RFP Intensity', 'Phase Intensity', 'Area', 'x','y', 'mask_fn']) #is this assignment of df correct? or does it overwrite "count_cells" above

    new_mask_list = [] #this creates a new mask list of only the necessary frames
    new_gfp_list = []
    new_rfp_list = []
    new_phase_list = []
    for k in frames:
        new_mask_list.append(mask_list[k]) 
        new_gfp_list.append(gfp_list[k])
        new_rfp_list.append(rfp_list[k])
        new_phase_list.append(phase_list[k])

    # below opens a mask image, uses xy coords to select specific mask, then uses that specific mask to measure other images
    for i, mask_name in enumerate(tqdm(new_mask_list)):
        #print(i, frames[i], mask_name, new_gfp_list[i], new_rfp_list[i], new_phase_list[i]) #sanity check to ensure correct images are being loaded+measured

        mask = io.imread(os.path.normpath(os.path.join(mask_dir, mask_name))) #load mask
        xi = int(x[i])
        yi = int(y[i]) #x and y coords
        labelled, n_labels = ndimage.label(mask) 
        
        #this block returns 0-value measurements if the cell_mask is unavailable at this frame, due to tracker-interpolation
        if (labelled[xi,yi] == 0):
            cell_mask = None
            area = None
            gfp_pix_value = None
            rfp_pix_value = None
            phase_pix_value = None
            
        else:
            cell_mask = labelled == labelled[xi,yi] #creating a new mask only including cell of interest
            area = ndimage.sum(cell_mask)

            #load images and measure
            gfp = io.imread(os.path.join(gfp_dir,new_gfp_list[i])) #load gfp image
            gfp_pix_value = ndimage.sum(gfp, cell_mask, 1) #measure cell_mask area of gfp image

            rfp = io.imread(os.path.join(rfp_dir,new_rfp_list[i]))
            rfp_pix_value = ndimage.sum(rfp, cell_mask, 1)

            phase = io.imread(os.path.join(phase_dir,new_phase_list[i]))
            phase_pix_value = ndimage.sum(phase, cell_mask, 1)

        #store measurements in pandas df
        df.loc[i, 'Cell ID'] = ID
        df.loc[i, 'Frame'] = frames[i]
        df.loc[i,'GFP Intensity'] = gfp_pix_value 
        df.loc[i,'RFP Intensity'] = rfp_pix_value
        df.loc[i,'Phase Intensity'] = phase_pix_value 
        df.loc[i,'Area'] = area
        df.loc[i, 'mask_fn'] = mask_name
        df.loc[i,'x'] = xi
        df.loc[i,'y'] = yi

    df.to_csv(stat_file, index=False)  
    #print(df)
print("FINISHHHHEEEEDDDDD")
os.system('spd-say "Master Nathan, your program has finished. I love you."')