## Comparing VAE architectures
This notebook compares the performance of different VAE architectures. Specifically, we are testing how model depth (num convolutional layers) and the size of the latent space impact:
1. Image reconstruction quality
2. Model generalizability
3. Biological information content of the latent space

In [1]:
import os
import plotly
import numpy as np
import glob as glob
from functions.utilities import path_leaf

#### Get paths to data, figures, and latent space outputs

In [33]:
root = "/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/"
# root = "E:\\Nick\\Dropbox (Cole Trapnell's Lab)\\Nick\\morphseq\\"

train_name = "20230915_vae"
model_path = "/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/training_data/20230815_vae/z50_bs032_ne100_depth05"
# model_path = "/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/training_data/20230915_vae/z100_bs032_ne100_depth05_out16/"

In [34]:
import pandas as pd
    
last_training = sorted(os.listdir(model_path))[-1]
    
m_fig_path = os.path.join(model_path, last_training, "figures")
    
# load data frame with results
morph_df = pd.read_csv(os.path.join(m_fig_path, "embryo_stats_df.csv"), index_col=0)

In [85]:
import plotly.express as px 
import plotly.graph_objects as go

wik_indices = np.where(morph_df["master_perturbation"]=="wck-AB")[0]
gdf3_indices = np.where(morph_df["master_perturbation"]=='Shh_100')[0]

fig = px.scatter(morph_df.iloc[wik_indices], x="UMAP_00", y="UMAP_01", opacity=0.1)
fig.add_trace(go.Scatter(x=morph_df.loc[gdf3_indices, "UMAP_00"], y=morph_df.loc[gdf3_indices, "UMAP_01"], mode="markers"))
fig.show()

In [38]:
import umap
import hdbscan
import sklearn.cluster as cluster

umap_array = morph_df.loc[:, ["UMAP_00", "UMAP_01"]].to_numpy()

labels = hdbscan.HDBSCAN(
    min_samples=10,
    min_cluster_size=500,
).fit_predict(umap_array)

In [53]:
from matplotlib import pyplot as plt

# umap_array = morph_df.loc[wck_indices, ["UMAP_00", "UMAP_01"]].to_numpy()
clustered = (labels >= 0)
lb_str = labels.astype('str')
key = ["no cluster", "dorsal/ventral cluster", "lateral cluster 1", "young cluster", "lateral cluster 2", "dorsal/ventral cluster"]
name_vec = [key[l+1] for l in labels]

fig = px.scatter(x=-umap_array[:, 0], y=umap_array[:, 1], opacity=0.25, color=name_vec,
                color_discrete_sequence=["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "gray"])

# fig.add_trace(go.Scatter(x=umap_array[:, 0], y=umap_array[:, 1], mode="markers",
#                        marker=dict(color=labels, opacity=0.25)))

fig.update_layout(template="plotly")

fig.update_xaxes(title_text='morphology dim 1')
fig.update_yaxes(title_text='morphology dim 2')

fig.show()

In [51]:
from sklearn.metrics import pairwise_distances

# force cluster assignments using nearest neighbor approach
unassigned_indices = np.where(labels<0)[0]
assigned_indices = np.where(labels>=0)[0]

dist_array = pairwise_distances(umap_array[unassigned_indices, :], umap_array[assigned_indices, :])
nn_indices = np.argmin(dist_array, axis=1)
labels_forced = labels.copy()
labels_forced[unassigned_indices] = labels_forced[assigned_indices][nn_indices]


In [54]:
name_vec_forced = [key[l+1] for l in labels_forced]

fig = px.scatter(x=-umap_array[:, 0], y=umap_array[:, 1], opacity=0.25, color=name_vec_forced,
                color_discrete_sequence=["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "gray"])

# fig.add_trace(go.Scatter(x=umap_array[:, 0], y=umap_array[:, 1], mode="markers",
#                        marker=dict(color=labels, opacity=0.25)))

fig.update_layout(template="plotly")

fig.update_xaxes(title_text='morphology dim 1')
fig.update_yaxes(title_text='morphology dim 2')

fig.show()

Use forced cluster labels to generate new vector that tells us whether to keep (0), flip LR (1), or remove (-1)

In [58]:
revise_vec = np.zeros(labels_forced.shape)
revise_vec[np.where(labels_forced==0)[0]] = -1 # Drop D/V embrypos 
revise_vec[np.where(labels_forced==1)[0]] = 1 # flip lateral cluster 1
revise_vec[np.where((labels_forced==2) & (umap_array[:,0] < 0))[0]] = 1 # flip right half of young cluster

In [59]:
fig = px.scatter(x=-umap_array[:, 0], y=umap_array[:, 1], opacity=0.25, color=revise_vec,
                color_discrete_sequence=["#636EFA", "#EF553B", "#00CC96", "#AB63FA", "gray"])

# fig.add_trace(go.Scatter(x=umap_array[:, 0], y=umap_array[:, 1], mode="markers",
#                        marker=dict(color=labels, opacity=0.25)))

fig.update_layout(template="plotly")

fig.update_xaxes(title_text='morphology dim 1')
fig.update_yaxes(title_text='morphology dim 2')

fig.show()

In [60]:
morph_df["revision_labels"] = revise_vec
morph_df.to_csv(os.path.join(m_fig_path, "embryo_stats_df_rev1.csv"))

In [68]:
morph_df_rev = pd.read_csv(os.path.join(m_fig_path, "embryo_stats_df_rev2.csv"))

In [91]:
wik_indices_rev = np.where(morph_df_rev["master_perturbation"]=="wck-AB")[0]
gdf3_indices_rev = np.where(morph_df_rev["master_perturbation"]=='Shh_100')[0]

fig = px.scatter(morph_df_rev.iloc[gdf3_indices_rev], x="UMAP_00_rev", y="UMAP_01_rev", 
                 opacity=1)#, color="predicted_stage_hpf")
fig.add_trace(go.Scatter(x=morph_df_rev.loc[wik_indices_rev, "UMAP_00_rev"], 
                         y=morph_df_rev.loc[wik_indices_rev, "UMAP_01_rev"], 
                         marker=dict(color=morph_df_rev.loc[wik_indices_rev, "predicted_stage_hpf"],
                                    opacity=0.1),
                         mode="markers"))
fig.show()