## Test methods for obtaining FF from raw image stacks


In [None]:
import os
import numpy as np
from PIL import Image
from skimage import (exposure, feature, filters, io, measure,
                      morphology, restoration, segmentation, transform,
                      util)
import matplotlib
from tqdm import tqdm
from PIL import Image
import glob2 as glob
from src.functions.image_utils import doLap
from src.functions.utilities import path_leaf
from skimage.transform import resize
from aicsimageio import AICSImage
import json
from tqdm import tqdm

import pandas as pd
import time
import nd2

#### Get paths to data, figures, and latent space outputs

In [None]:
train_name = "20230915_vae"
model_path = "/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/training_data/20230915_vae/z100_bs032_ne250_depth05_out16_temperature_sweep2/"
# model_path = "/Users/nick/Dropbox (Cole Trapnell's Lab)/Nick/morphseq/training_data/20230915_vae/z100_bs032_ne100_depth05_out16/"

In [None]:
os.listdir(model_path)

In [None]:
import pandas as pd
    
last_training = sorted(os.listdir(model_path))[-2]
    
m_fig_path = os.path.join(model_path, last_training, "figures")
    
# load data frame with results
morph_df = pd.read_csv(os.path.join(m_fig_path, "embryo_stats_df.csv"), index_col=0)
metric_df = pd.read_csv(os.path.join(m_fig_path, "metric_df.csv"), index_col=0)
meta_df = pd.read_csv(os.path.join(m_fig_path, "meta_summary_df.csv"), index_col=0)
loss_df = pd.read_csv(os.path.join(model_path, last_training, "loss_tracker.csv"), index_col=0)

In [None]:
import plotly.express as px 
import plotly.graph_objects as go

wik_indices = np.where(morph_df["master_perturbation"]=="wck-AB")[0]

fig = px.scatter(morph_df.iloc[wik_indices], x="UMAP_00", y="UMAP_01", color="predicted_stage_hpf", opacity=0.5)
fig.show()

In [None]:
fig = px.scatter(morph_df.iloc[wik_indices], x="UMAP_00_bio", y="UMAP_01_bio", color="predicted_stage_hpf", 
                 opacity=0.5)
fig.show()

In [None]:
fig = px.scatter(morph_df.iloc[wik_indices], x="UMAP_00_bio", y="UMAP_01_bio", color="train_cat", 
                 opacity=0.5)
fig.show()

In [None]:
fig = px.scatter(morph_df.iloc[wik_indices], x="UMAP_00_n", y="UMAP_01_n", color="predicted_stage_hpf", 
                 opacity=0.5)
fig.show()

Despite the imposition of a metric constraint on the biological latent variables, they still quite obviously encode orientational differences. Why?

It could be that the model doing a good job with cosine distance, but that this does not translate to the euclidean space that is being read out (and warped in obscure ways) by the UMAP compression.

It is also possible that the metric loss is simply outcompeted by other terms.

In [None]:
from sklearn.metrics import pairwise_distances

metric_df0 = metric_df.iloc[np.where(metric_df["contrast_id"]==0)]

fig = go.Figure()

fig.add_trace(go.Histogram(x=metric_df0["cos_all"], histnorm="probability", name="all latent variables"))
fig.add_trace(go.Histogram(x=metric_df0["cos_bio"], histnorm="probability", name="biological partition"))
fig.add_trace(go.Histogram(x=metric_df0["cos_nbio"], histnorm="probability", name="non-biological partition"))
fig.update_layout(title="Cosine similarity scores between pairs of transformed images")
fig.show()


Clearly, SOMETHING is being learned such that the distance between transformed versions of the same image look more similar in the biological partition (according to the cosine metric) than in the non-biological one.

In [None]:
# I think we need to normalize by number of bio and non-bio variables
n_bio = 90
n_nbio = 10

metric_df0.loc[:, "euc_all_norm"] = metric_df0.loc[:, "euc_all"] / np.sqrt(n_bio + n_nbio)
metric_df0.loc[:, "euc_bio_norm"] = metric_df0.loc[:, "euc_bio"] / np.sqrt(n_bio)
metric_df0.loc[:, "euc_nbio_norm"] = metric_df0.loc[:, "euc_nbio"] / np.sqrt(n_nbio)

fig = go.Figure()

fig.add_trace(go.Histogram(x=metric_df0["euc_all_norm"], histnorm="probability", name="all latent variables"))
fig.add_trace(go.Histogram(x=metric_df0["euc_bio_norm"], histnorm="probability", name="biological partition"))
fig.add_trace(go.Histogram(x=metric_df0["euc_nbio_norm"], histnorm="probability", name="non-biological partition"))
fig.update_layout(title="Average Euclidean distance between pairs of transformed images")
fig.show()

In [None]:
fig = px.line(loss_df, x="epoch", y="ntxent_loss", color="train_cat", markers=False,
             labels={"ntxent_loss":"contrastive loss"},
             log_y=True)
fig.show()

In [None]:
fig = px.line(loss_df, x="epoch", y="recon_loss", color="train_cat", markers=False, #, log_y=True,
             labels={"recon_loss":"reconstruction loss (MSE)"},
             log_y=True)

# fig.update_xaxes(range=[0, 250])
fig.update_yaxes(range=[2, 3])

fig.show()

In [None]:
fig = px.line(loss_df, x="epoch", y="reg_loss", color="train_cat", markers=False, #, log_y=True,
             labels={"reg_loss":"Gaussian prior loss (KLD)"})

# fig.update_xaxes(range=[0, 250])
# fig.update_yaxes(range=[0, 650])

fig.show()

In [None]:
import plotly.express as px 
import plotly.graph_objects as go

gdf3_indices = np.where(morph_df["master_perturbation"]=="gdf3")[0]

fig = px.scatter(morph_df.iloc[gdf3_indices], x="UMAP_00", y="UMAP_01", color="predicted_stage_hpf", 
                 opacity=0.5, template="plotly")

fig.add_trace(go.Scatter(x=morph_df["UMAP_00_bio"].iloc[wik_indices], y=morph_df["UMAP_01_bio"].iloc[wik_indices],
              mode="markers", marker=dict(color="blue", opacity=0.1))
             )

fig.show()