### Notebook to run initial analyses on the dynamics of KF aggregate formation
1) Generate reference sphere mesh 
2) Look at deep cell density fluctuations over time: how early is aggregate position apparent?
3) Pull basic stats: entropy, velocity, cell number

In [1]:
import os
import pandas as pd
import numpy as np
from src.utilities.shape_utils import plot_mesh
from scipy.spatial import KDTree
import plotly.express as px
import trimesh

# Load test dataset that used Kikume NLS marker
root = "E:\\Nick\\Cole Trapnell's Lab Dropbox\\Nick Lammers\\Nick\\killi_tracker\\"
experiment_date = "20240611_NLS-Kikume_24hpf_side2"
config_name = "tracking_jordao_20240918.txt"
model ="LCP-Multiset-v1"
tracking_folder = config_name.replace(".txt", "")
tracking_folder = tracking_folder.replace(".toml", "")

well_num = 0
start_i = 0
stop_i = 1600

suffix = ""

# get path to metadata
metadata_path = os.path.join(root, "metadata", "tracking")

# set output path for tracking results
project_path = os.path.join(root, "tracking", experiment_date,  tracking_folder, f"well{well_num:04}" + suffix, "")
project_sub_path = os.path.join(project_path, f"track_{start_i:04}" + f"_{stop_i:04}" + suffix, "")

# load the tracks 
deep_tracks_df = pd.read_csv(os.path.join(project_sub_path, "deep_tracks_df.csv"))
deep_tracks_df.head()

Unnamed: 0,track_id,t,z,y,x,id,parent_track_id,parent_id,volume,gmm_label,gmm0_prob,gmm1_prob,gmm_logL,v,xs,ys,zs
0,1,0,5.0,58.0,441.0,1000001,-1,-1,3324.375,0.0,0.633889,0.366111,-2.073905,0.812908,441.205668,52.380518,5.741098
1,1,1,5.0,58.0,441.0,2000001,-1,1000001,3587.625,0.0,0.633889,0.366111,-2.073905,0.812908,441.205668,52.380518,5.741098
2,1,2,5.0,57.0,441.0,3000001,-1,2000001,3439.125,0.0,0.633889,0.366111,-2.073905,0.812908,441.169257,52.361909,5.609894
3,1,3,5.0,57.0,441.0,4000001,-1,3000001,3206.25,0.0,0.633889,0.366111,-2.073905,0.812908,441.169257,52.361909,5.609894
4,1,4,5.0,56.0,440.0,5000001,-1,4000001,3196.125,0.0,0.633889,0.366111,-2.073905,0.812908,440.123616,52.308788,5.483976


In [2]:
# if on mac
# tracks_df = pd.read_csv("/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/killi_temp/tracks01.csv")
# tracks_df.head()

### Number of deep cells over time (in 1 half of embryo)

In [33]:
t_res = 123.11

figure_dir = os.path.join(project_sub_path, "figures", "")
os.makedirs(figure_dir, exist_ok=True)

cell_count_df = deep_tracks_df.loc[:, ["t", "track_id"]].groupby("t").count().reset_index(drop=True).rename(columns={"track_id":"n_cells"})
cell_count_df["time (hrs)"] = np.asarray(cell_count_df.index) * t_res / 60 / 60
# cell_count_df.head()
# n_cell_vec = np.sum(face_df.loc[:, f_cols].to_numpy(), axis=1)

fig = px.scatter(cell_count_df, x="time (hrs)", y="n_cells", labels={"n_cells":"number of deep cells"}, opacity=0.5,
                trendline="lowess", trendline_options=dict(frac=0.1))#"ewm", trendline_options=dict(halflife=25), opacity=0.5)

# Update the trendline appearance
fig.update_traces(
    selector=dict(mode="lines"),  # Select the trendline trace (line mode)
    line=dict(color="black", width=3, dash="dash")  # Change color, thickness, and style
)

trendline_trace = fig.data[1]
n_trend = trendline_trace.y

fig.update_layout(xaxis=dict(range=[0,54]))
fig.show()
fig.write_image(figure_dir + "n_cells_vs_time.png", scale=2)

### Deep cell velocity over time

In [36]:
import plotly.colors as pc

# calculate average velocity and volume
deep_tracks_df[["dx", "dy", "dz"]] = deep_tracks_df.loc[:, ["track_id", "x", "y", "z"]].groupby(["track_id"]).diff()
deep_tracks_df["v"] = np.sqrt(deep_tracks_df["dx"]**2 + deep_tracks_df["dy"]**2 + deep_tracks_df["dz"]**2)

deep_tracks_v = deep_tracks_df.loc[:, ["t", "v"]].groupby(["t"]).mean().reset_index()

deep_tracks_v["time (hrs)"] = deep_tracks_v["t"] * t_res / 60 / 60

second_color = pc.DEFAULT_PLOTLY_COLORS[1]

fig = px.scatter(deep_tracks_v, x="time (hrs)", y="v",opacity=1, trendline="lowess", trendline_options=dict(frac=0.05),
                labels={"v":"velocity (um/s)"}, color_discrete_sequence=[second_color])

fig.update_layout(
    yaxis=dict(range=[0.5, 4])  # Set the y-axis range (min, max)
)

# Update the trendline appearance
fig.update_traces(
    selector=dict(mode="lines"),  # Select the trendline trace (line mode)
    line=dict(color="black", width=3, dash="dash")  # Change color, thickness, and style
)

fig.update_layout(xaxis=dict(range=[0,54]))

trendline_trace = fig.data[1]
v_trend = trendline_trace.y

fig.show()

fig.write_image(figure_dir + "cell_vel_vs_time.png", scale=2)

### Look at entropy

In [37]:
cell_density_df = pd.read_csv(os.path.join(project_sub_path, "cell_density_df.csv"))
cell_density_df.head()

Unnamed: 0,time,baseline,density_f00000,density_f00001,density_f00002,density_f00003,density_f00004,density_f00005,density_f00006,density_f00007,...,density_f20470,density_f20471,density_f20472,density_f20473,density_f20474,density_f20475,density_f20476,density_f20477,density_f20478,density_f20479
0,0.0,0.004725,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.004694,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2.0,0.004675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.0,0.004651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.0,0.00463,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
face_cols = [col for col in list(cell_density_df.columns) if "density" in col]
density_array = cell_density_df.loc[:, face_cols].to_numpy() #+ sh_cell_density_df["baseline"].to_numpy()[:, None]
density_array[density_array < 0] = 0
prob_array = np.divide(density_array, np.sum(density_array, axis=1)[:, None])

entropy = -np.sum(np.multiply(prob_array, np.log(prob_array + 1e-16)), axis=1)

third_color = pc.DEFAULT_PLOTLY_COLORS[2]

fig = px.scatter(x=cell_density_df["time"] * t_res / 3600, y=entropy,
                labels={"y":"entropy", "x":"time (hrs)"}, color_discrete_sequence=[third_color],
                trendline="lowess", trendline_options=dict(frac=0.03))

# Update the trendline appearance
fig.update_traces(
    selector=dict(mode="lines"),  # Select the trendline trace (line mode)
    line=dict(color="black", width=3, dash="dash")  # Change color, thickness, and style
)
fig.update_layout(xaxis=dict(range=[0,54]))
fig.show()

np.arange(0, 1600, 5)

# Extract the trendline data
trendline_trace = fig.data[1]  # The second trace (index 1) is usually the trendline

e_trend = trendline_trace.y

fig.write_image(figure_dir + "entropy_vs_time.png", scale=2)

### Combine

In [48]:
import plotly.graph_objects as go

time_vec = cell_density_df["time"] * t_res / 3600

# normed entropy
e_norm = e_trend - np.min(e_trend)
e_norm = e_norm / (np.max(e_trend) - np.min(e_trend))

# normed v
v_norm = v_trend - np.min(v_trend)
v_norm = v_norm / (np.max(v_trend) - np.min(v_trend))

# normed N
n_norm = n_trend - np.min(n_trend)
n_norm = n_norm / (np.max(n_trend) - np.min(n_trend))

first_color = pc.DEFAULT_PLOTLY_COLORS[0]
    
fig = go.Figure()

fig.add_traces(go.Scatter(x=time_vec, y=n_norm, mode="markers", marker=dict(size=4, color=first_color), name="number of cells"))
fig.add_traces(go.Scatter(x=time_vec, y=v_norm, mode="markers", marker=dict(size=4, color=second_color),name="cell speed"))
fig.add_traces(go.Scatter(x=time_vec, y=e_norm, mode="markers", marker=dict(size=4, color=third_color), name="entropy"))

fig.update_layout(xaxis=dict(range=[0, 54.5]),
                  yaxis=dict(range=[-0.2, 1.3]))
fig.show()

fig.write_image(figure_dir + "all_vs_time.png", scale=2)

In [41]:
n_norm

array([  7,   9,  11, ..., 556, 553, 525], dtype=int64)

### Test earth-mover's distance

In [7]:
# from scipy.spatial.distance import cdist
# from src.utilities.shape_utils import calculate_face_centroids
# import trimesh

# # load sphere mesh
# sphere_mesh = trimesh.load(os.path.join(project_sub_path, "embryo_sphere_mesh.obj"))

# sphere_mesh_u = trimesh.creation.icosphere(subdivisions=5, radius=1.0)

# face_centroids_u = calculate_face_centroids(sphere_mesh_u)
# face_centroids = calculate_face_centroids(sphere_mesh)

# # Compute pairwise geodesic distances
# dot_products = np.dot(face_centroids_u, face_centroids_u.T)

In [8]:
# # Ensure numerical stability for acos
# dot_products = np.clip(dot_products, -1.0, 1.0)  # Clip to avoid issues with floating-point precision
# angular_distances = np.arccos(dot_products)

# C = np.mean(face_centroids, axis=0)
# radius = np.linalg.norm(face_centroids[0, :] - C)

# geodesic_distances = radius * angular_distances

In [9]:
# from src.utilities.plot_utils import mesh_face_plot

# f, v = sphere_mesh.faces.copy(), sphere_mesh.vertices.copy()

# fig = mesh_face_plot(f, v, geodesic_distances[0, :])
# fig.show()

In [32]:
from tqdm import tqdm

# Example: EMD between two time points
time_vec = cell_density_df["time"].to_numpy()[:-1]
kld_vec = np.empty((prob_array.shape[0]-1,))
reg = 1e-16

for t in tqdm(range(len(kld_vec))):


#     pt0 = prob_array[t, :] + reg
#     pt0 = pt0 / np.sum(pt0)
#     pt1 = prob_array[t+1, :] + reg
#     pt1 = pt1 / np.sum(pt1)

    pt0 = density_array[t, :] + reg
    pt0 = pt0 / np.sum(pt0)
    pt1 = density_array[t+1, :] + reg
    pt1 = pt1 / np.sum(pt1)

    kld_vec[t] = np.sum((pt0-pt1)**2)
#     kld_vec[t] = np.sum(np.multiply(pt1, np.log(pt1) - np.log(pt0)))

    
fig = px.scatter(x=time_vec, y=kld_vec)

fig.update_layout(
    yaxis=dict(type="log")
)
# fig.update_layout(xaxis=dict(range=([0, 0.2])))
fig.show()

100%|██████████| 1599/1599 [00:00<00:00, 2444.96it/s]


### Entropy over time

In [209]:
# for this, I want SH interpolation
import flowshape as fs
import igl
import scipy as sp

l_max = 7 # this sets level of detail

v, f = sphere_mesh.vertices.copy(), sphere_mesh.faces.copy()
# this utility does the above steps + SH decomposition
# Here, using maximum degree 24

v_sphere = fs.sphere_map(v, f)
v_bary = igl.barycenter(v_sphere, f)
v_bary = fs.project_sphere(v_bary)
W = 0.5 * igl.doublearea(v_sphere, f)
W = sp.sparse.diags(W)

face_df_sh = face_df.copy()

for i in tqdm(range(face_df.shape[0])):

    face_intensity = face_df.loc[i, f_cols].to_numpy().ravel()
    mu = np.mean(face_intensity.copy())
    face_intensity_norm = face_intensity.copy() - mu
    
    weights, Y_mat = fs.IRF_scalar(face_intensity_norm, v_bary, W, max_degree=l_max)
    
    fi_recon = Y_mat.dot(weights) + mu
    fi_recon = fi_recon - np.min(fi_recon)#[fi_recon<0] = 1e-12
    face_df_sh.loc[i, f_cols] = fi_recon

100%|██████████| 1600/1600 [01:09<00:00, 23.06it/s]


In [211]:
fi_array = face_df_sh.loc[:, face_nz_cols].to_numpy().copy()
pdf_array = np.divide(fi_array, np.sum(fi_array, axis=1)[:, None])
entropy = -np.sum(np.multiply(pdf_array, np.log(pdf_array + 1e-12)), axis=1)

In [212]:
entropy

array([4.35798978, 4.35471801, 4.36635066, ..., 4.16825214, 4.1554241 ,
       4.14788697])

In [213]:
fig = px.scatter(x=time_vec, y=entropy)
fig.show()

In [173]:
entropy

0      -0.0
1      -0.0
2      -0.0
3      -0.0
4      -0.0
       ... 
1595   -0.0
1596   -0.0
1597   -0.0
1598   -0.0
1599   -0.0
Length: 1600, dtype: float64