#### Notebook to make supplementary plots showing UMAP of morph space

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
from glob2 import glob
from src.functions.plot_functions import format_3d_plotly, rotate_figure

In [None]:
# load embryo_df for our current best model
# root = "/media/nick/hdd02/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/morphseq/"

root = "/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/morphseq/"
train_name = "20241107_ds"
model_name = "SeqVAE_z100_ne150_sweep_01_block01_iter030" 
train_dir = os.path.join(root, "training_data", train_name, "")
output_dir = os.path.join(train_dir, model_name) 

# get path to model
training_path = sorted(glob(os.path.join(output_dir, "*")))[-1]
training_name = os.path.dirname(training_path)
read_path = os.path.join(training_path, "figures", "")

# path to save data
# data_path = os.path.join(root, "results", "20240303", "")

# path to figures and data
fig_path = "/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/slides/morphseq/20250312/morph_metrics/"
fig_data_path = "/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/slides/morphseq/20250312/data/morph_metrics/"
os.makedirs(fig_path, exist_ok=True)
os.makedirs(fig_data_path, exist_ok=True)

### Load full morph dataset with all embryos 

In [None]:
morph_df = pd.read_csv(read_path + "embryo_stats_df.csv", index_col=0)
umap_df = pd.read_csv(read_path + "umap_df.csv", index_col=0)
print(umap_df.shape)
umap_df = umap_df.merge(morph_df.loc[:, ["snip_id", "embryo_id", "experiment_time"]], how="left", on=["snip_id"])
print(umap_df.shape)

### Make 3D UMAP and PCA for hotfish experiments

In [None]:
HF_experiments = np.asarray(['20240813_24hpf', '20240813_30hpf', '20240813_36hpf']) #, '20240813_extras'])
hf_morph_df = morph_df.loc[np.isin(morph_df["experiment_date"], HF_experiments), :].reset_index()
hf_umap_df = umap_df.loc[np.isin(umap_df["experiment_date"], HF_experiments), :].reset_index()
hf_outlier_snips = np.asarray(["20240813_24hpf_F06_e00_t0000", "20240813_36hpf_D03_e00_t0000", "20240813_36hpf_C03_e00_t0000"]) 
hf_umap_df = hf_umap_df.loc[~np.isin(hf_umap_df["snip_id"], hf_outlier_snips), :]

In [None]:
hf_umap_df["timepoint"] = np.floor(hf_umap_df["predicted_stage_hpf"]).astype(int)

# make umap scatter
fig = px.scatter_3d(hf_umap_df, x="UMAP_00_bio_3", y="UMAP_01_bio_3", z="UMAP_02_bio_3", 
                    color="temperature", symbol="timepoint",
                    hover_data={"predicted_stage_hpf", "experiment_date", "snip_id"},
                    color_continuous_scale="RdBu_r", range_color=[17, 39])

fig = format_3d_plotly(fig, axis_labels=["morph umap 1", "morph umap 2", "morph umap 3"], theme="dark")

fig = rotate_figure(fig, zoom_factor=0.8, z_rotation=30+180)

fig.show()

fig.write_image(os.path.join(fig_path, "hotfish_umap.png"))
fig.write_html(os.path.join(fig_path, "hotfish_umap.html"))

In [None]:
# save dataset
plot_df = hf_umap_df.loc[:, ["snip_id", "predicted_stage_hpf", "experiment_date", "snip_id", "temperature", 
                             "UMAP_00_bio_3", "UMAP_01_bio_3", "UMAP_02_bio_3"]]
plot_df.to_csv(os.path.join(fig_data_path, "umap_plot_df.csv"), index=False)