# Create annotation files

Script to generate files needed for manual annotation. 

0) mount drive

1) Run block #1.

2) Change the sample name in block #2 and run. Will generate annotation files for all image blocks for that sample. Output is in /Annotation/ 2023/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install ipdb
import ipdb

In [None]:
# 1) setup
#load packages
import pandas as pd
import numpy as np
from skimage import io,util, measure
import shutil,re,datetime
import os
import sys
!pip install dill
import dill
from scipy.spatial.distance import cdist
# !pip install ipdb
# import ipdb

# load function for fatetrack objects
def load_object(filename):
   with open(filename, 'rb') as red:  # Overwrites any existing file.
       tmp = dill.load(red)
   return(tmp)


# function to reformat cell type names
def strip_list_format(celltype_list):
  if celltype_list == 'none':
    return 'none'
  if len(celltype_list) == 1:
    return celltype_list[0]
  elif len(celltype_list) == 2 and celltype_list[0] == '1_YFP' and celltype_list[1] == '2_CY5':
    return '1_YFP_2_CY5'
  else:
    return 'none'


def mCherryToDAPITranslation(mCherry_props, DAPI_props):
  """ Find the corresponding nuclear labels in the HCR Round 1 mCherry and DAPI images.

  Parameters
  _________
  mCherry_props: DataFrame
    contains properties of the nuclei in the mCherry image (label and centroid)
  DAPI_props: DataFrame
    contains properties of the nuclei in the DAPI image (label and centroid), comes directly from self.HCR_measurements

  Returns
  _______
  label_translation: DataFrame
    contains the nuclear label in the mCherry image and its corresponding label in the DAPI image
  """
  mCherry = mCherry_props
  DAPI = DAPI_props
  mCherry_nucs = np.asarray(mCherry[['centroid-0', 'centroid-1']])  # get centroid positions for mCherry nuclei
  DAPI_nucs = np.asarray(DAPI[['nuc_Rd1_centroid-0', 'nuc_Rd1_centroid-1']])  # get centroid positions for DAPI nuclei
  centroid_dist = cdist(mCherry_nucs, DAPI_nucs)  # find the Euclidean distance between each mCherry nucleus and each DAPI nucleus
  closest_DAPI_nucleus = np.argmin(centroid_dist, axis = 1)  # index of each of the closest DAPI nuclei
  closest_DAPI_label = DAPI.iloc[closest_DAPI_nucleus][['Rd1_orig_label_DAPI']]  # gets the labels of each closest DAPI nucleus in the right order to merge back with the mCherry nuclei
  label_translation = np.concatenate([np.asarray(mCherry[['label']]), np.asarray(closest_DAPI_label)], axis = 1)  # concatenate mCherry labels with corresponding DAPI labels
  label_translation = np.delete(label_translation, np.nonzero(np.amin(centroid_dist, axis = 1) > 10), axis = 0)  # remove rows where distance between the nuclei > 10 px
  label_translation = pd.DataFrame(label_translation, columns = ['Rd1_orig_label', 'Rd1_orig_label_DAPI'])  # convert to dataframe
  label_translation.drop_duplicates(subset = 'Rd1_orig_label_DAPI', keep = False, inplace = True)
  return label_translation


# function to generate all needed files for annotation
def generateAnnotationFiles(ftoDir, sampleBase, block):
    # make the file
    sample = sampleBase + "_2trailing_block" + str(block) + "of9"
    fto = load_object(ftoDir + sample + '/' + sample + '.pkl')
    hcrs = fto.HCR_measurements
    hcrs_lp_meas = hcrs.filter(like = "mean").filter(like = "lp")
    hcrs_lp_meas = hcrs_lp_meas * 10000  # makes the number interpretable in MATLAB
    hcrs = pd.concat([hcrs[['Rd1_orig_label_DAPI', 'celltype', 'lp_Rd1_centroid-0', 'lp_Rd1_centroid-1']], hcrs_lp_meas.astype(int), hcrs.filter(like = "mean").filter(like = "nuc").astype(int)], axis = 1)
    hcrs['annotation'] = hcrs['celltype'].apply(strip_list_format)

    # get all HCR mCherry nuclei
    # HCR_Rd1_mCherrymask_props = pd.DataFrame(measure.regionprops_table(fto.HCR_Rd1_mCherry_mask,intensity_image= fto.HCR_Rd1_mCherry_image, 
                                                                        # properties=['label','bbox','centroid','area','mean_intensity']))
    HCR_Rd1_mCherry_labels_centroids = pd.concat([fto.HCR_Rd1_mCherrymask_props[['label']], fto.HCR_Rd1_mCherrymask_props[['centroid-0', 'centroid-1']].astype(int)], axis=1)
    HCR_Rd1_mCherry_labels_centroids.rename(columns={"label" : "Rd1_orig_label"}, inplace=True)

    # translation between mCherry and DAPI nuclei
    # mCherry_DAPI_translation = mCherryToDAPITranslation(HCR_Rd1_mCherrymask_props, fto.HCR_measurements)
    
    # do a left join to get all mCherry nuclei and their correspondign DAPI label, and then all the DAPI properties from "hcrs"
    measDF = HCR_Rd1_mCherry_labels_centroids.merge(fto.mCherry_DAPI_translation, on="Rd1_orig_label")
    measDF = measDF.merge(hcrs, on="Rd1_orig_label_DAPI")

    measDF = measDF.rename(columns = {'Rd1_orig_label_DAPI' : 'pointID', 'lp_Rd1_centroid-0' : 'yCoord', 'lp_Rd1_centroid-1' : 'xCoord'})
    measDF.drop(columns=['celltype'], inplace=True)
    # make a directory to save the files to
    path = '/content/drive/MyDrive/FateTrack_Main/Annotation/2023/' + sampleBase + '/' + sample
    if not os.path.isdir(path):
      os.makedirs(path)
    # save the annotation file and the aligned tifs to the directory
    measDF.to_csv(path + "/" + sample + "_annotations.csv", index = False)
    images1 = fto.HCR_Rd1_images
    images2 = fto.HCR_Rd2_images
    io.imsave(path + "/mask.tif", util.img_as_uint(fto.HCR_Rd1_DAPI_mask>0))
    io.imsave(path + "/1CY5.tif", images1[np.where(fto.HCR_channelList=='CY5')[0][0]])
    io.imsave(path + "/1CY3.tif", images1[np.where(fto.HCR_channelList=='CY3')[0][0]])
    io.imsave(path + "/1YFP.tif", images1[np.where(fto.HCR_channelList=='YFP')[0][0]])
    io.imsave(path + "/1DAPI.tif", images1[np.where(fto.HCR_channelList=='DAPI')[0][0]])
    io.imsave(path + "/1A594.tif", images1[np.where(fto.HCR_channelList=='A594')[0][0]])
    io.imsave(path + "/2CY5.tif", images2[np.where(fto.HCR_channelList=='CY5')[0][0]])
    io.imsave(path + "/2CY3.tif", images2[np.where(fto.HCR_channelList=='CY3')[0][0]])
    io.imsave(path + "/2YFP.tif", images2[np.where(fto.HCR_channelList=='YFP')[0][0]])
    io.imsave(path + "/2DAPI.tif", images2[np.where(fto.HCR_channelList=='DAPI')[0][0]])
    io.imsave(path + "/2A594.tif", images2[np.where(fto.HCR_channelList=='A594')[0][0]])

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting dill
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 KB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.6


In [None]:
sampleBase = "507_D3_1_w1"
block = 0
ftoDir = '/content/drive/MyDrive/FateTrack_Main/test/1028/'
sample = sampleBase + "_2trailing_block" + str(block) + "of9"
fto = load_object(ftoDir + sample + '/' + sample + '.pkl')
hcrs = fto.HCR_measurements
# hcrs_lp_meas = hcrs.filter(like = "mean").filter(like = "lp")
# hcrs_lp_meas = hcrs_lp_meas * 10000  # makes the number interpretable in MATLAB
# hcrs = pd.concat([hcrs[['Rd1_orig_label_DAPI', 'celltype']], hcrs_lp_meas.astype(int), hcrs.filter(like = "mean").filter(like = "nuc").astype(int)], axis = 1)

# # get all HCR mCherry nuclei
# HCR_Rd1_mCherrymask_props = pd.DataFrame(measure.regionprops_table(fto.HCR_Rd1_mCherry_mask,intensity_image= fto.HCR_Rd1_mCherry_image, 
#                                                                     properties=['label','bbox','centroid','area','mean_intensity']))
# HCR_Rd1_mCherry_labels_centroids = pd.concat([HCR_Rd1_mCherrymask_props[['label']], HCR_Rd1_mCherrymask_props[['centroid-0', 'centroid-1']].astype(int)], axis = 1)
# HCR_Rd1_mCherry_labels_centroids.rename(columns={"label" : "Rd1_orig_label"}, inplace=True)

# # translation between mCherry and DAPI nuclei
# mCherry_DAPI_translation = mCherryToDAPITranslation(HCR_Rd1_mCherrymask_props, fto.HCR_measurements)


In [None]:
# 2) set sample and run functions
ftoDir = '/content/drive/MyDrive/FateTrack_Main/test/1028/'
sampleBase = "507_D3_1_w1"
for block in range(6,7):
  generateAnnotationFiles(ftoDir, sampleBase, block)

In [None]:
# 2) set sample and run functions
ftoDir = '/content/drive/MyDrive/FateTrack_Main/test/1028/'
samples = [("507_D3_1_w1", 0), ("507_D3_1_w2", 3), ("516_D4_2_w1", 4), ("621_D2_1_w2", 7)]
for sampleBase, block in samples:
  generateAnnotationFiles(ftoDir, sampleBase, block)

# Merge annotations back into FateTrack object

In [None]:
def save_object(obj, filename):
    """Save a FateTrack object to a .pkl file.

    Parameters
    __________
    obj: FateTrack object.
      The filename for the .pkl object.
    filename: str
      The filename for the .pkl object.
    """
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        dill.dump(obj, output)


def merge_annotations(ftoDir, sampleBase, block):
  # open FateTrack object
  sample = sampleBase + "_2trailing_block" + str(block) + "of9"
  fto = load_object(ftoDir + sample + '/' + sample + '.pkl')
  # load annotation file
  annotations = pd.read_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/' + sampleBase + "/" + sample + "/" + sample + "_annotations.csv")

  hcrs = fto.HCR_measurements
  print(len(hcrs))
  annotations = annotations[["pointID", "annotation"]]
  hcrs = hcrs.merge(annotations, left_on = "Rd1_orig_label_DAPI", right_on = "pointID")
  hcrs = hcrs.drop(columns =['celltype'])
  hcrs = hcrs.rename(columns = {'annotation' : 'celltype'})
  fto.HCR_measurements = hcrs
  save_object(fto, ftoDir + sample + '/' + sample + '.pkl')
  


In [None]:
ftoDir = '/content/drive/MyDrive/FateTrack_Main/test/1028/'
sampleBase = "507_D3_1_w1"
block = 6
merge_annotations(ftoDir, sampleBase, block)

5148


In [None]:
# combine split annotations into single annotation file
# for tiled connection files
sample = sampleBase + "_2trailing_block" + str(block) + "of9"
annotations_all = pd.DataFrame()
for i in range(9):
  tile_annotations = pd.read_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/' + sampleBase + "/" + sample + "/tile" + str(i) + ".csv")
  annotations_all = pd.concat([annotations_all, tile_annotations], axis = 0)
annotations_all = annotations_all.reset_index(drop = True)
annotations_all.to_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/' + sampleBase + "/" + sample + "/" + sample + "_annotations.csv")


# split up annotation file

In [None]:
# read in annotation
annotation = pd.read_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/507_D3_1_w1/507_D3_1_w1_2trailing_block6of9/507_D3_1_w1_2trailing_block6of9_annotations.csv')
cutoffs = [-1, 1032, 2066, 3100]
tiles = []
for x in range(1, 4):
  for y in range(1,4):
    tiles = tiles + [annotation.query('xCoord > @cutoffs[@x - 1] & xCoord <= @cutoffs[@x] & yCoord > @cutoffs[@y - 1] & yCoord <= @cutoffs[@y]')]

for i, tile in enumerate(tiles):
  tile.to_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/507_D3_1_w1/507_D3_1_w1_2trailing_block6of9/tile' + str(i) + '.csv', index = False)

# not this one

Script to generate files needed for manual annotation. 

0) mount drive

1) Run block #1.

2) Change the sample name in block #2 and run. Will generate annotation files for all image blocks for that sample. Output is in /Annotation/ 2023/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 1) setup
#load packages
import pandas as pd
import numpy as np
from skimage import io,util, measure
import shutil,re,datetime
import os
import sys
import dill
!pip install ipdb
import ipdb

# load function for fatetrack objects
def load_object(filename):
   with open(filename, 'rb') as red:  # Overwrites any existing file.
       tmp = dill.load(red)
   return(tmp)


# function to generate all needed files for annotation
def generateAnnotationFiles(ftoDir, sampleBase, block):
    # make the file
    sample = sampleBase + "_2trailing_block" + str(block) + "of9"
    fto = load_object(ftoDir + sample + '/' + sample + '.pkl')
    hcrs = fto.HCR_measurements
    hcrs_lp_meas = hcrs.filter(like = "mean").filter(like = "lp")
    hcrs_lp_meas = hcrs_lp_meas * 10000  # makes the number interpretable in MATLAB
    hcrs = pd.concat([hcrs[['Rd1_orig_label_DAPI', 'celltype']], hcrs[['lp_Rd1_centroid-0', 'lp_Rd1_centroid-1']].astype(int), hcrs_lp_meas.astype(int), hcrs.filter(like = "mean").filter(like = "nuc").astype(int)], axis = 1)
    masterDF_subset = fto.masterDF.drop(columns=["Unnamed: 0"]).drop_duplicates()[['Rd1_orig_label', 'Rd1_orig_label_DAPI']]  # all the drop steps deal with an underlying duplication error that has been addressed in the FateTrack processing pipeline now
    measDF = masterDF_subset.merge(hcrs, 'left', on='Rd1_orig_label_DAPI')
    measDF = measDF.rename(columns = {'Rd1_orig_label_DAPI' : 'pointID', 'lp_Rd1_centroid-0' : 'yCoord', 'lp_Rd1_centroid-1' : 'xCoord'})
    measDF['annotation'] = measDF['celltype'].apply(strip_list_format)
    measDF.drop(columns=['celltype', 'Rd1_orig_label'], inplace=True)
    # make a directory to save the files to
    path = '/content/drive/MyDrive/FateTrack_Main/Annotation/2023/' + sampleBase + '/' + sample
    if not os.path.isdir(path):
      os.makedirs(path)
    # save the annotation file and the aligned tifs to the directory
    measDF.to_csv(path + "/" + sample + "_annotations.csv", index = False)
    images1 = fto.HCR_Rd1_images
    images2 = fto.HCR_Rd2_images
    io.imsave(path + "/mask.tif", util.img_as_uint(fto.HCR_Rd1_DAPI_mask>0))
    # io.imsave(path + "/cyto_mask.tif", util.img_as_uint(fto.HCR_Rd1_cyto_mask>0))
    io.imsave(path + "/1CY5.tif", images1[np.where(fto.HCR_channelList=='CY5')[0][0]])
    io.imsave(path + "/1CY3.tif", images1[np.where(fto.HCR_channelList=='CY3')[0][0]])
    io.imsave(path + "/1YFP.tif", images1[np.where(fto.HCR_channelList=='YFP')[0][0]])
    io.imsave(path + "/1DAPI.tif", images1[np.where(fto.HCR_channelList=='DAPI')[0][0]])
    io.imsave(path + "/1A594.tif", images1[np.where(fto.HCR_channelList=='A594')[0][0]])
    io.imsave(path + "/2CY5.tif", images2[np.where(fto.HCR_channelList=='CY5')[0][0]])
    io.imsave(path + "/2CY3.tif", images2[np.where(fto.HCR_channelList=='CY3')[0][0]])
    io.imsave(path + "/2YFP.tif", images2[np.where(fto.HCR_channelList=='YFP')[0][0]])
    io.imsave(path + "/2DAPI.tif", images2[np.where(fto.HCR_channelList=='DAPI')[0][0]])
    io.imsave(path + "/2A594.tif", images2[np.where(fto.HCR_channelList=='A594')[0][0]])

In [None]:
sample = '507_D3_1_w1'
block = 0
sample = sampleBase + "_2trailing_block" + str(block) + "of9"
fto = load_object(ftoDir + sample + '/' + sample + '.pkl')
TL_last_m