## Exploring morph VAE output
This notebook generates visualizations and conducts analyses to assess the biological content of the latent space representations learned by our VAE models

In [None]:
import os
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo

pyo.init_notebook_mode()

#### Get paths to data, figures, and latent space outputs

In [None]:
root = "/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/"
# root = "E:\\Nick\\Dropbox (Cole Trapnell's Lab)\\Nick\\morphseq\\"
train_name = "20230915_vae"
# train_name = "20231106_ds"
# /Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/training_data/20230915_vae_flipped/z100_bs032_ne100_depth05
# model_name = "20230804_vae_full_conv_z25_bs032_ne100_depth05"
model_name = "z100_bs032_ne250_depth05_out16_temperature_sweep2"
# model_name = "z100_bs064_ne250_depth05_out16_class_ignorance_test"
train_dir = os.path.join(root, "training_data", train_name)
output_dir = os.path.join(train_dir, model_name) 

# get path to model
last_training = sorted(os.listdir(output_dir))[-1]

# path to figures and data
figure_path = os.path.join(output_dir, last_training, "figures")
out_figure_path = os.path.join("/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/slides/20231130/")
if not os.path.isdir(out_figure_path):
    os.makedirs(out_figure_path)

#### Create DataLoader objects for train, eval, and test sets
- "Train" data were used to train the model
- "Eval" data were used to assess model during training
- "Test" data were untouched during training process

In [None]:
umap_df = pd.read_csv(os.path.join(figure_path, "umap_df.csv"), index_col=0)
wik_indices = np.where(umap_df["master_perturbation"]=="wck-AB")[0]
from scipy.interpolate import LinearNDInterpolator
from scipy import ndimage

umap_df["UMAP_00_bio_2"] = -umap_df["UMAP_00_bio_2"]

In [None]:
fig = px.scatter(umap_df.iloc[wik_indices], x="UMAP_00_bio_2", y="UMAP_01_bio_2",
                         color='predicted_stage_hpf', opacity=0.5,
#                          labels=dict(UMAP_00="morphology UMAP dim 1", 
#                                      UMAP_01="morphology UMAP dim 2",),
                         template="plotly")

# fig.update_layout(legend=[ f"training images (mu={np.round(train_mu)})", f"eval images (mu={np.round(eval_mu)})", f"test images (mu={np.round(test_mu)})"]) 

# fig.update_traces(contours_coloring="fill", colorscale="Blues")

fig.update_layout(
                xaxis_title="UMAP 1",
                yaxis_title="UMAP 2"
            )

# fig.update_xaxes(range=[1, 18])
# fig.update_yaxes(range=[-5, 14])

fig.show()
fig.write_image(os.path.join(out_figure_path, "UMAP_wt_scatter_bio.png"))

In [None]:

def make_rotating_figure(scatter, angle_vec, frame_dir, iter_lb=None, title_str=None, za=None):

    for iter_i, a in enumerate(angle_vec):
        angle = a
        if za is None:
            za = 0.8
        vec = np.asarray([math.cos(angle), math.sin(angle), za])
        vec = vec*2
        camera = dict(
            eye=dict(x=vec[0], y=vec[1], z=vec[2]))
        
        fig = go.Figure()

        fig.add_trace(scatter)
        fig.update_layout(template="plotly")
        
        if title_str is not None:
            fig.update_layout(title_text=title_str, title_x=0.5)

        fig.update_layout(
                    scene=dict(
                        zaxis=dict(autorange="reversed"),
                        aspectratio=dict(x=1, y=1, z=1)))


        fig.update_layout(scene_camera=camera, scene_dragmode='orbit')

        fig.update_layout(scene = dict(
                        xaxis_title='',#'UMAP 1',
                        yaxis_title='',#'UMAP 2',
                        zaxis_title='',#'UMAP 3',
                        xaxis = dict(showticklabels=False),
                        yaxis = dict(showticklabels=False),
                        zaxis = dict(showticklabels=False)))

        fig.update_layout(coloraxis_showscale=False)
        
#         fig.update_layout(
#                 scene=dict(aspectratio=dict(x=1, y=1, z=1), 
#                     xaxis = dict(nticks=4, range=[-0.3, 0.3],),
#                     yaxis = dict(nticks=4, range=[-0.3, 0.3],),
#                     zaxis = dict(nticks=4, range=[-0.15, 0.15]),))

#         fig.show()
#     return fig
#         if iter_lb is None:
        fig.write_image(os.path.join(frame_dir, "plot_a" + "_" + f"{iter_i:03}" + ".png"), scale=5)

In [None]:
import math 
frame_dir_bio = os.path.join(out_figure_path, "3d_umaps")
if not os.path.isdir(frame_dir_bio):
    os.makedirs(frame_dir_bio)
    
angle_vec = np.linspace(1.25*np.pi, 3.25*np.pi, 25)

#define the trace for triangle sides
scatter = go.Scatter3d(
                   x=umap_df["UMAP_00_bio_3"].iloc[wik_indices],
                   y=umap_df["UMAP_01_bio_3"].iloc[wik_indices],
                   z=umap_df["UMAP_02_bio_3"].iloc[wik_indices],
                   mode='markers',
                   marker = dict(color=umap_df["predicted_stage_hpf"].iloc[wik_indices],
                                 size=6,
                                 line=dict(color= 'rgba(70,70,70, 0.5)', width=1))) 


# make_rotating_figure(scatter, [angle_vec[0]], frame_dir_bio)

In [None]:
fig = px.scatter_3d(umap_df.iloc[wik_indices], x="UMAP_00_bio_3", y="UMAP_01_bio_3", z="UMAP_02_bio_3",
                         color='predicted_stage_hpf', opacity=1, 
#                          labels=dict(UMAP_00="morphology UMAP dim 1", 
#                                      UMAP_01="morphology UMAP dim 2",),
                         template="plotly")

# fig.update_layout(legend=[ f"training images (mu={np.round(train_mu)})", f"eval images (mu={np.round(eval_mu)})", f"test images (mu={np.round(test_mu)})"]) 

# fig.update_traces(contours_coloring="fill", colorscale="Blues")
fig.update_traces(marker_size = 4)
fig.update_layout(scene = dict(
                    xaxis_title='UMAP 1',
                    yaxis_title='UMAP 2',
                    zaxis_title='UMAP 3')
            )
fig.show()

In [None]:
fig = px.scatter(umap_df.iloc[wik_indices], x="UMAP_00_n_2", y="UMAP_01_n_2",
                         color='predicted_stage_hpf', opacity=0.5,
#                          labels=dict(UMAP_00="morphology UMAP dim 1", 
#                                      UMAP_01="morphology UMAP dim 2",),
                         template="plotly")

# fig.update_layout(legend=[ f"training images (mu={np.round(train_mu)})", f"eval images (mu={np.round(eval_mu)})", f"test images (mu={np.round(test_mu)})"]) 

# fig.update_traces(contours_coloring="fill", colorscale="Blues")

fig.update_layout(
                xaxis_title="UMAP 1",
                yaxis_title="UMAP 2"
            )

# fig.update_xaxes(range=[1, 18])
# fig.update_yaxes(range=[1, 10])

fig.show()
fig.write_image(os.path.join(out_figure_path, "UMAP_wt_scatter_nbio.png"))

## calculate velocities in UMAP space

In [None]:
snip_vec = np.asarray([umap_df.loc[i, "snip_id"][:-10] for i in range(umap_df.shape[0])])
snip_index = np.unique(snip_vec)

umap_df.loc[:, "UMAP_00_bio_3_vel"] = np.nan
umap_df.loc[:, "UMAP_01_bio_3_vel"] = np.nan
umap_df.loc[:, "UMAP_02_bio_3_vel"] = np.nan

for s, snip in enumerate(snip_index):
    s_indices = np.where(snip_vec==snip)[0]
    
    u0 = umap_df.loc[s_indices, "UMAP_00_bio_3"].to_numpy()
    u1 = umap_df.loc[s_indices, "UMAP_01_bio_3"].to_numpy()
    u2 = umap_df.loc[s_indices, "UMAP_02_bio_3"].to_numpy()
    
    t = umap_df.loc[s_indices, "predicted_stage_hpf"].to_numpy()
    
    du0 = np.divide(np.diff(u0), np.diff(t))
    du1 = np.divide(np.diff(u1), np.diff(t))
    du2 = np.divide(np.diff(u2), np.diff(t))
    
    umap_df.loc[s_indices[:-1], "UMAP_00_bio_3_vel"] = du0
    umap_df.loc[s_indices[:-1], "UMAP_01_bio_3_vel"] = du1
    umap_df.loc[s_indices[:-1], "UMAP_02_bio_3_vel"] = du2

## Use simple kmeans clustering approach to extract average velocities for different parts of UMAP space

In [None]:
from sklearn.cluster import KMeans

umap_df_wt = umap_df.iloc[wik_indices].dropna()
n_points = umap_df_wt.shape[0]

# lets aim for 100 obs per cluster
n_points_per_cluster = 200
n_clusters = int(np.floor(n_points/ n_points_per_cluster))

# cluster
kmeans_out = KMeans(n_clusters=n_clusters, random_state=0, n_init="auto").fit(
        umap_df_wt.loc[:, ["UMAP_00_bio_3", "UMAP_01_bio_3"]])

umap_df_wt.loc[:, "kmeans_label"] = kmeans_out.labels_
cols_to_average = ["UMAP_00_bio_3", "UMAP_01_bio_3", 
                   "UMAP_00_bio_3_vel", "UMAP_01_bio_3_vel", "predicted_stage_hpf"]
avg_vel_wik_df = umap_df_wt.loc[:, cols_to_average + ["kmeans_label"]].groupby("kmeans_label").mean()

In [None]:
from sklearn.cluster import KMeans
gdf3_indices = np.where(umap_df["master_perturbation"]=="gdf3")[0]

umap_df_gdf3 = umap_df.iloc[gdf3_indices].dropna()
n_points = umap_df_gdf3.shape[0]

# lets aim for 100 obs per cluster
n_points_per_cluster = 50
n_clusters = int(np.floor(n_points/ n_points_per_cluster))

# cluster
kmeans_out_gdf3 = KMeans(n_clusters=n_clusters, random_state=0, n_init="auto").fit(
        umap_df_gdf3.loc[:, ["UMAP_00_bio_3", "UMAP_01_bio_3"]])

umap_df_gdf3.loc[:, "kmeans_label"] = kmeans_out_gdf3.labels_

avg_vel_gdf3_df = umap_df_gdf3.loc[:, cols_to_average + ["kmeans_label"]].groupby("kmeans_label").mean()

In [None]:
import plotly.figure_factory as ff

X = umap_df_wt["UMAP_00_bio_3"].to_numpy()
Y = umap_df_wt["UMAP_01_bio_3"].to_numpy()

fig1 = go.Figure(go.Histogram2dContour(
        x = -X,
        y = Y,
        colorscale = 'Blues',
        ncontours = 30,
        histnorm = "percent",
        showscale=False
))

max_v = 1

vx = -avg_vel_wik_df["UMAP_00_bio_3_vel"]
vx[np.where(vx>max_v)[0]] = max_v
vx[np.where(vx<-max_v)[0]] = -max_v

vy = avg_vel_wik_df["UMAP_01_bio_3_vel"]
vy[np.where(vy>max_v)[0]] = max_v
vy[np.where(vy<-max_v)[0]] = -max_v

fig2 = ff.create_quiver(x=-avg_vel_wik_df["UMAP_00_bio_3"], y=avg_vel_wik_df["UMAP_01_bio_3"], 
                       u=vx, v=vy,
                       scale=1, arrow_scale=0.5, line=dict(color="black"))

fig2.add_traces(data = fig1.data)

fig2.update_xaxes(title="UMAP 1")#range=[-19, 0])
fig2.update_yaxes(title="UMAP 2") #range=[0, 10])

fig2.show()
fig2.write_image(os.path.join(out_figure_path, "density_plot_with_quiver_wik.png"), scale=2)

In [None]:
# same but for gdf3

X = umap_df_gdf3["UMAP_00_bio_3"].to_numpy()
Y = umap_df_gdf3["UMAP_01_bio_3"].to_numpy()

fig1 = go.Figure(go.Histogram2dContour(
        x = -X,
        y = Y,
        colorscale = 'Reds',
        ncontours = 30,
        histnorm = "percent",
        showscale=False
))

max_v = 1

vx = -avg_vel_gdf3_df["UMAP_00_bio_3_vel"]
vx[np.where(vx>max_v)[0]] = max_v
vx[np.where(vx<-max_v)[0]] = -max_v

vy = avg_vel_gdf3_df["UMAP_01_bio_3_vel"]
vy[np.where(vy>max_v)[0]] = max_v
vy[np.where(vy<-max_v)[0]] = -max_v

fig2 = ff.create_quiver(x=-avg_vel_gdf3_df["UMAP_00_bio_3"], y=avg_vel_gdf3_df["UMAP_01_bio_3"], 
                       u=vx, v=vy,
                       scale=1, arrow_scale=0.5, line=dict(color="black"))

fig2.add_traces(data = fig1.data)

fig2.update_xaxes(title="UMAP 1")#range=[-19, 0])
fig2.update_yaxes(title="UMAP 2") #range=[0, 10])

fig2.show()
fig2.write_image(os.path.join(out_figure_path, "density_plot_with_quiver_gdf3.png"), scale=2)

In [None]:
fig = px.density_contour(avg_vel_df_all, x="UMAP_00_bio_3", y="UMAP_01_bio_3", z="predicted_stage_hpf",
fig.update_traces(contours_coloring="fill", colorscale="magma")


fig.show()

In [None]:
fig = px.density_contour(avg_vel_df_all, x="UMAP_00_bio_3", y="UMAP_01_bio_3")

fig.update_traces(contours_coloring="fill")

fig.show()

In [None]:
X = avg_vel_df_all["UMAP_00_bio_3"].to_numpy()
Y = avg_vel_df_all["UMAP_01_bio_3"].to_numpy()
Z = avg_vel_df_all["UMAP_02_bio_3"].to_numpy()

dX = avg_vel_df_all["UMAP_00_bio_3_vel"].to_numpy()
dY = avg_vel_df_all["UMAP_01_bio_3_vel"].to_numpy()
dZ = avg_vel_df_all["UMAP_02_bio_3_vel"].to_numpy()

n_bins = 30
xx = np.linspace(min(X), max(X), num=n_bins+1)
yy = np.linspace(min(Y), max(Y), num=n_bins+1)
zz = np.linspace(min(Z), max(Z), num=n_bins+1)

Xg, Yg, Zg = np.meshgrid(xx, yy, zz)  # 3D grid for interpolation

xyz_grid_long = np.concatenate((X[:, np.newaxis], Y[:, np.newaxis], Z[:, np.newaxis]), axis=1)

# interpolate each direction
interp_dx = LinearNDInterpolator(xyz_grid_long, dX.flatten(), fill_value=0)
dXI = interp_dx(Xg, Yg, Zg)

interp_dy = LinearNDInterpolator(xyz_grid_long, dY.flatten(), fill_value=0)
dYI = interp_dy(Xg, Yg, Zg)

interp_dz = LinearNDInterpolator(xyz_grid_long, dZ.flatten(), fill_value=0)
dZI = interp_dz(Xg, Yg, Zg)

# dXInn = dXI.copy()
# dXIzeros[np.where(dXI==0)]
# dXIzeros = dXI
# dXIzeros = dXI

# Apply mild gaussian smoothing
dXIS = ndimage.gaussian_filter(dXI, 1)
dYIS = ndimage.gaussian_filter(dYI, 1)
dZIS = ndimage.gaussian_filter(dZI, 1)

# calculate overall potential gradient magnitudes
UM = np.sqrt(dXIS**2 + dYIS**2 + dZIS**2)


fig = go.Figure(data=go.Volume(
    x=Xg.flatten(), y=Yg.flatten(), z=Zg.flatten(),
    value=UM.flatten(),
    isomin=0.25,
#     isomax=0.7,
    opacity=0.25,
    surface_count=25,
    colorscale="ice"
    ))

fig.update_layout(template="plotly")
# fig.update_layout(scene_xaxis_showticklabels=False,
#                   scene_yaxis_showticklabels=False,
#                   scene_zaxis_showticklabels=False)
fig.show()