In [None]:
import os
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo


pyo.init_notebook_mode()

In [None]:
from glob2 import glob
from src.functions.utilities import path_leaf

root = "/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/morphseq/"

# root = "E:\\Nick\\Dropbox (Cole Trapnell's Lab)\\Nick\\morphseq\\"
train_name = "20231106_ds"
model_name_list = ["MetricVAE_z100_ne100_beta_temp_sweep"]


train_dir = os.path.join(root, "training_data", train_name)
output_dir = os.path.join(train_dir, "model_comparisons", "")
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)
    
meta_df_list = []
metric_df_list = []
umap_df_list = []
embryo_df_list = []
fig_path_list = []
model_id = 0

for m, model_name in enumerate(model_name_list):
    read_dir = os.path.join(train_dir, model_name, '')
    candidate_models = glob(read_dir + '*VAE*')
    
    for mdl in candidate_models:
        mdl_name = path_leaf(mdl)
        model_fig_path = os.path.join(mdl, "figures", "")
        if os.path.isdir(model_fig_path):
            fig_path_list.append(model_fig_path)
            
            # load data frames
            meta_df = pd.read_csv(os.path.join(figure_path, "meta_summary_df.csv"), index_col=0)
            meta_df["model_id"] = model_id
            meta_df["model_name"] = mdl_name
            meta_df_list.append(umap_df)
            
            umap_df = pd.read_csv(os.path.join(figure_path, "umap_df.csv"), index_col=0)
            umap_df["model_id"] = model_id
            umap_df["model_name"] = mdl_name
            umap_df["temperature"] = meta_df["temperature"].copy()
            umap_df_list.append(umap_df)
            
            metric_df = pd.read_csv(os.path.join(figure_path, "metric_df.csv"), index_col=0)
            metric_df["model_id"] = model_id
            metric_df["model_name"] = mdl_name
            metric_df["temperature"] = meta_df["temperature"].copy()
            metric_df_list.append(metric_df)
            
            embryo_df = pd.read_csv(os.path.join(figure_path, "embryo_stats_df.csv"), index_col=0)
            embryo_df["model_id"] = model_id
            embryo_df["model_name"] = mdl_name
            embryo_df["temperature"] = meta_df["temperature"].copy()
            embryo_df_list.append(embryo_df)
            
            
master_meta_df = pd.concat(meta_df_list, axis=0, ignore_index=True)
master_umap_df = pd.concat(umap_df_list, axis=0, ignore_index=True)
master_metric_df = pd.concat(metric_df_list, axis=0, ignore_index=True)
master_embryo_df = pd.concat(embryo_df_list, axis=0, ignore_index=True)


In [None]:
# load datasets
umap_df = pd.read_csv(os.path.join(figure_path, "umap_df.csv"), index_col=0)
wik_indices = np.where(umap_df["master_perturbation"]=="wck-AB")[0]

In [None]:
# look at the umap
fig = px.scatter_3d(umap_df.iloc[wik_indices], x="UMAP_00_bio_3", y="UMAP_01_bio_3", z="UMAP_02_bio_3",
                         color='predicted_stage_hpf', opacity=0.5, 
                         template="plotly")

fig.update_traces(
    marker=dict(size=6)
    )

# fig.update_layout(
#                 xaxis_title="UMAP 1",
#                 yaxis_title="UMAP 2",
#                 zaxis_title="UMAP 3" 
#             )


fig.show()
# fig.write_image(os.path.join(out_figure_path, "UMAP_wt_scatter_bio.png"))

In [None]:
metric_df = pd.read_csv(os.path.join(figure_path, "metric_df.csv"), index_col=0)

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Histogram(x=metric_df.loc[:, "euc_bio_rand"], name="euc_bio_rand")])
fig.add_trace(go.Histogram(x=metric_df.loc[:, "euc_nbio_rand"], name="euc_nbio_rand"))

fig.add_trace(go.Histogram(x=metric_df.loc[:, "euc_bio"], name="euc_bio"))
fig.add_trace(go.Histogram(x=metric_df.loc[:, "euc_nbio"], name="euc_nbio"))

fig.show()

In [None]:
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

col_list = metric_df.columns

bio_indices = np.asarray([i for i in range(len(col_list)) if "z_mu_b_" in col_list[i]])
nbio_indices = np.asarray([i for i in range(len(col_list)) if "z_mu_n_" in col_list[i]])

In [None]:
bio_z_array = metric_df.iloc[:, bio_indices].to_numpy()
nbio_z_array = metric_df.iloc[:, nbio_indices].to_numpy()

bio_var = np.var(bio_z_array)
nbio_var = np.var(nbio_z_array)

print(bio_var)
print(nbio_var)