In [None]:
import pandas as pd
import numpy as np
import os
from src.utilities.plot_functions import format_2d_plotly
from ultrack.tracks.graph import get_paths_to_roots, tracks_df_forest, inv_tracks_df_forest
from glob2 import glob
from tqdm import tqdm

## Ultrack may not want to track lcp+ cells, but by god we're going to make it

## Load mask data

In [None]:
root = "E:\\Nick\\Cole Trapnell's Lab Dropbox\\Nick Lammers\\Nick\\killi_tracker\\"
project_name = "20250311_LCP1-NLSMSC"

# full mask dataset
full_mask_fluo_dir = os.path.join(root, "built_data", "fluorescence_data", project_name, "")
fluo_frames = sorted(glob(full_mask_fluo_dir + "*.csv"))
fluo_df_list = []
for df_path in tqdm(fluo_frames):
    df = pd.read_csv(df_path)
    fluo_df_list.append(df)

fluo_df_full = pd.concat(fluo_df_list, axis=0, ignore_index=True)

In [None]:
fluo_df_full["stage"] = 26 + fluo_df_full["frame"]*1.5/60
fig_path = os.path.join(root, "figures", "tracking", project_name)
os.makedirs(fig_path, exist_ok=True)

## Filter for only nuclei with bright lcp expression

In [None]:
import plotly.express as px

# fluo_thresh = 110
min_time = 70 # avoid weird early stuff
N = 50
# fluo_df_lcp = fluo_df_full.loc[(fluo_df_full["mean_fluo"]>fluo_thresh) & (fluo_df_full["stage"]>min_time), :].copy()
fluo_df_lcp = fluo_df_full.loc[fluo_df_full["stage"]>=min_time, :].groupby('frame', group_keys=False).apply(lambda g: g.nlargest(N, 'mean_fluo'))
fluo_df_lcp.head()

## Call track stitching...can we use this to track single-frame fragments?

In [None]:
tracks_df = fluo_df_lcp.rename(columns={"frame": "t"}).sort_values(by=["t", "nucleus_id"])
tracks_df['id'] = pd.factorize(list(zip(tracks_df['t'], tracks_df['nucleus_id'])))[0]
tracks_df["parent_track_id"] = -1
tracks_df.shape

In [None]:
from ultrack.tracks.gap_closing import close_tracks_gaps
from scipy.optimize import linear_sum_assignment
from scipy.spatial import distance_matrix

max_radius = 25
scale_vec = np.asarray([3, 1, 1])
time_index = np.unique(tracks_df["t"])
tracks_df["track_id"] = -1
# Split the DataFrame by the unique values in the 'group' column
dfs = {key: group for key, group in tracks_df.groupby('t')}
dfs[time_index[0]]["track_id"] = np.arange(dfs[time_index[0]].shape[0])

# perform cell tracking
for _, t1 in enumerate(tqdm(time_index[1:])):

    # get prev locations and IDs
    df0 = dfs[t1-1]
    xyz0 = np.multiply(df0[["x", "y", "z"]].to_numpy(), scale_vec)
    extant_tracks = df0["track_id"].to_numpy()
    # get curr locations
    df1 = dfs[t1]
    xyz1 = np.multiply(df1[["x", "y", "z"]].to_numpy(), scale_vec)

    # get distances
    dist_mat = distance_matrix(xyz1, xyz0)

    # solve
    row_ind, col_ind = linear_sum_assignment(dist_mat)

    # assign track IDs
    valid_links = dist_mat[row_ind, col_ind] < max_radius

    new_track_ids = np.zeros_like(extant_tracks) - 1
    new_track_ids[valid_links] = extant_tracks[col_ind[valid_links]]

    # assign new track IDs
    max_id = np.max(extant_tracks)
    n_new = np.sum(~valid_links)
    new_track_ids[~valid_links] = np.arange(max_id+1, max_id+n_new+1)

    # assign to dataframe
    df1["track_id"] = new_track_ids
    # assign to dictionary
    dfs[t1] = df1


In [None]:
# convert dict to dataframe
tracks_df = pd.concat(dfs.values(), axis=0, ignore_index=True)
len(np.unique(tracks_df["track_id"]))

In [None]:
# do stitching
print("Stitching tracks...")
max_gap = 3
max_radius = 25 * 3
scale_vec = np.array([3, 1, 1])
tracks_df_stitched = close_tracks_gaps(tracks_df, max_gap=max_gap, max_radius=max_radius, scale=scale_vec)

# make save path
data_path = os.path.join(root, "built_data", "tracking", project_name)
os.makedirs(data_path, exist_ok=True)
tracks_df_stitched.to_csv(os.path.join(data_path, "lcp_tracks_df.csv"), index=False)

In [None]:
len(np.unique(tracks_df_stitched["track_id"]))