In [1]:
#load packages
import pandas as pd
import numpy as np
from skimage import io,util, measure
import shutil,re,datetime
import os
import sys
!pip install dill
import dill
from scipy.spatial.distance import cdist
# !pip install ipdb
# import ipdb

# load function for fatetrack objects
def load_object(filename):
   with open(filename, 'rb') as red:  # Overwrites any existing file.
       tmp = dill.load(red)
   return(tmp)


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting dill
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 KB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.6


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# load fto file
ftoDir = '/content/drive/MyDrive/FateTrack_Main/test/1028/'
sampleBase = "507_D3_1_w1"
block = 6
sample = sampleBase + "_2trailing_block" + str(block) + "of9"
fto = load_object(ftoDir + sample + '/' + sample + '.pkl')
    

# Generate RLG File

In [11]:
# format first frame of rlg file (TL only)
# get info for all TL
tl_info = fto.timelapse_nucFluor_features[-1]
n_cells = len(tl_info)
# get pointID column
pointID = tl_info['label'].apply(lambda x: 100000 + x).astype(int)

In [12]:
# get frameNumber column
frameNumber = pd.Series(np.ones(n_cells).astype(int))

In [13]:
# get coordinate columns
coords = tl_info[['centroid-1', 'centroid-0']].astype(int)

In [14]:
# get parentID column
parentID = pd.Series(np.full(n_cells, np.nan))

In [15]:
# reformats master ID as rlg ID
def reformat_master_id(master_id):
  split_string = master_id.split('_')
  return int(split_string[0]) * 100000 + int(split_string[1])

In [16]:
# get annotation column
# load in existing connection annotations
connection_table = fto.TLtoHCR_connections
connection_table = connection_table[['MasterID_1', 'Rd1_orig_label']]
# reformat IDs so they can be matched to the pointIDs in the RLG frame
connection_table['TL_ID'] = connection_table['MasterID_1'].apply(reformat_master_id)
connection_table['HCR_ID'] = connection_table['Rd1_orig_label'].apply(lambda x: 200000 + x)
connection_table = connection_table[['TL_ID', 'HCR_ID']]
connection_table
# loop through ids to find appropriate annotation based on connection table
annotation = []
for id in pointID:
  connections_with_id = connection_table[connection_table['TL_ID'] == id]
  if len(connections_with_id) == 0:
    annotation.append("death")
  elif len(connections_with_id) == 1:
    annotation.append("pass")
  else:
    annotation.append("split")
annotation = pd.Series(annotation)

In [17]:
# assemble RLG dataframe for TL data
rlg_frame = pd.concat([pointID, frameNumber, coords, parentID, annotation], axis = 1)
rlg_frame = rlg_frame.set_axis(["pointID", "frameNumber", "xCoord", "yCoord", "parentID", "annotation"], axis=1)
rlg_frame["annotation"] = rlg_frame["annotation"].astype('string')

In [18]:
# format second frame of rlg file (HCR)
# get info for HCR Rd1 mCherry
hcr_info = pd.DataFrame(measure.regionprops_table(fto.HCR_Rd1_mCherry_mask, properties=['label','centroid']))
n_cells = len(hcr_info)
# get pointID column
pointID = hcr_info['label'].apply(lambda x: 200000 + x)

In [19]:
# get frameNumber column
frameNumber = pd.Series(np.ones(n_cells).astype(int) * 2)

In [20]:
# get coordinate columns
coords = hcr_info[['centroid-1', 'centroid-0']].astype(int)

In [21]:
# assemble what you have so far
rlg_hcr = pd.concat([pointID, frameNumber, coords], axis = 1)
rlg_hcr = rlg_hcr.set_axis(["pointID", "frameNumber", "xCoord", "yCoord"], axis = 1)
# get parentID column
rlg_hcr = rlg_hcr.merge(connection_table, 'left', left_on = "pointID", right_on = "HCR_ID")
rlg_hcr = rlg_hcr.drop(columns = ["HCR_ID"]).rename(columns = {"TL_ID" : "parentID"})


In [22]:
# add annotation column
rlg_hcr['annotation'] = rlg_hcr['parentID'].isna()
rlg_hcr.loc[rlg_hcr['annotation'], 'annotation'] = "birth"
rlg_hcr.loc[rlg_hcr['annotation'] == False, 'annotation'] = "pass"
rlg_hcr['annotation'] = rlg_hcr['annotation'].astype("string")

In [23]:
# put it all together
rlg_frame_full = pd.concat([rlg_frame, rlg_hcr], axis = 0)

In [24]:
outpath = '/content/drive/MyDrive/FateTrack_Main/Annotation/2023/connections/' + sample + '/'
if not os.path.isdir(outpath):
      os.makedirs(outpath)
rlg_frame_full.to_csv(outpath + sample + '_rlg.csv', index = False)

In [25]:
# save out necessary images
io.imsave(outpath + "/a594_time1.tif", fto.HCR_Rd1_mCherry_image)
io.imsave(outpath + "/a594_time0.tif", fto.timelapse_nuclear_images[-1])

# Split up RLG file

In [4]:
# load in RLG file
rlg = pd.read_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/connections/507_D3_1_w1_2trailing_block6of9/507_D3_1_w1_2trailing_block6of9_rlg.csv')

In [5]:
rlg.head()

Unnamed: 0,pointID,frameNumber,xCoord,yCoord,parentID,annotation
0,100001,1,613,5,,death
1,100002,1,626,4,,death
2,100003,1,794,7,,death
3,100004,1,1196,7,,death
4,100005,1,1531,5,,death


In [8]:
# block into 9x9s
cutoffs = [-1, 1032, 2066, 3100]
tiles = []
for x in range(1, 4):
  for y in range(1,4):
    tiles = tiles + [rlg.query('xCoord > @cutoffs[@x - 1] & xCoord <= @cutoffs[@x] & yCoord > @cutoffs[@y - 1] & yCoord <= @cutoffs[@y]')]


In [15]:
for i, tile in enumerate(tiles):
  tile.to_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/connections/507_D3_1_w1_2trailing_block6of9/tiles/tile' + str(i) + '.csv', index = False)

# Merge connections back into FTO

In [21]:
# load in connection annotations
connection_rlg = pd.read_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/connections/' + sample + '/' + sample + '_rlg.csv')

In [6]:
# for tiled connection files
connection_rlg = pd.DataFrame()
for i in range(9):
   tile_rlg = pd.read_csv('/content/drive/MyDrive/FateTrack_Main/Annotation/2023/connections/' + sample + '/tiles/tile' + str(i) + '.csv')
   connection_rlg = pd.concat([connection_rlg, tile_rlg], axis = 0)
connection_rlg = connection_rlg.reset_index(drop = True)

In [9]:
connection_rlg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8701 entries, 0 to 8700
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   pointID      8701 non-null   int64  
 1   frameNumber  8701 non-null   int64  
 2   xCoord       8701 non-null   float64
 3   yCoord       8701 non-null   float64
 4   parentID     4002 non-null   float64
 5   annotation   8701 non-null   object 
dtypes: float64(3), int64(2), object(1)
memory usage: 408.0+ KB


In [10]:
# reformat rlg file into translation table
filtered_connections = connection_rlg[connection_rlg['frameNumber'] == 2].dropna()

In [11]:
filtered_connections['parentID'] = filtered_connections['parentID'].astype(int)

In [12]:
# reformats rlg ID as nuclear label
def reformat_rlg_id(rlg_id, frameNumber):
  return int(rlg_id) - (100000 * frameNumber)

In [13]:
# reformat IDs
tl_labels = filtered_connections['parentID'].apply(lambda x: reformat_rlg_id(x, 1))
hcr_labels = filtered_connections['pointID'].apply(lambda x: reformat_rlg_id(x, 2))
translation_table = pd.concat([tl_labels, hcr_labels], axis = 1)#.rename(columns = {})


In [14]:
translation_table = translation_table.rename(columns = {"parentID": "MasterID_1", "pointID": "Rd1_orig_label"}).reset_index(drop=True)

In [15]:
fto.manual_connections = translation_table

In [16]:
def save_object(obj, filename):
    """Save a FateTrack object to a .pkl file.

    Parameters
    __________
    obj: FateTrack object.
      The filename for the .pkl object.
    filename: str
      The filename for the .pkl object.
    """
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        dill.dump(obj, output)

In [17]:
save_object(fto, ftoDir + sample + '/' + sample + '.pkl')