# Imports

In [None]:
from phenoseeker import EmbeddingManager
from pathlib import Path
import pandas as pd
import numpy as np

# Load chad img embeddings

In [None]:
base_path = Path("/projects/imagesets4/temp_embeds/")

In [None]:

chad_cls_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_embeds.npy")
chad_cls_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_dataframe.parquet")


chad_rand_feats = base_path / Path("ctrls_images_chad_dinov2s_rand_token_embeds.npy")
chad_rand_metadata = base_path / Path("ctrls_images_chad_dinov2s_rand_token_dataframe.parquet")


chad_cls_sm02_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_sm02_embeds.npy")
chad_cls_sm02_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_sm02_dataframe.parquet")

In [None]:
chad_em_img = EmbeddingManager(chad_cls_metadata, entity="image")

In [None]:
chad_em_img.load("chad_cls", chad_cls_feats)
chad_em_img.load("chad_cls_sm02", chad_cls_sm02_feats, chad_cls_sm02_metadata)
chad_em_img.load("chad_rand", chad_rand_feats, chad_cls_metadata)


In [None]:
chad_em_img = chad_em_img.filter_and_instantiate(Metadata_PlateType="COMPOUND")

In [None]:
chad_em_well = chad_em_img.grouped_embeddings(group_by="well", cols_to_keep=['Metadata_Batch','Metadata_JCP2022', 'Metadata_InChI', "Metadata_Well"])

In [None]:
df = pd.read_parquet('/projects/cpjump1/jump/load_data/ctrls_images_10by9sources.parquet')

In [None]:
df

In [None]:
plates_with_ctrl = list(chad_em_well.df['Metadata_Plate'].unique())


In [None]:
#plates_with_ctrl.remove("Dest210628-162003")
plates_with_ctrl.remove("Dest210823-174240")


In [None]:

plates_with_ctrl.remove("Dest210823-174422")

In [None]:
chad_em_well = chad_em_well.filter_and_instantiate(Metadata_Plate=plates_with_ctrl)

In [None]:

for model_name in list(chad_em_well.embeddings):
    chad_em_well.apply_spherizing_transform(embeddings_name=f"{model_name}", new_embeddings_name=f"{model_name}_sph", norm_embeddings=False)
    chad_em_well.apply_inverse_normal_transform(embeddings_name=f"{model_name}_sph", new_embeddings_name=f"{model_name}_sph_int")

chad_em_well.save_to_folder(Path('/projects/synsight/data/jump_embeddings/wells_embeddings/chad/'))

# add other wells embeddings

In [None]:

for model_name in ['dinov2_s', 'openphenom', 'resnet50', 'chada']:
    base_path = Path(f'/projects/synsight/data/jump_embeddings/wells_embeddings/{model_name}')

    meta_path_dino = base_path / f'metadata_{model_name}.parquet'
    embeddings_path_dino = base_path / f'embeddings_{model_name}.npy'
    chad_em_well.load(f"{model_name}", embeddings_path_dino, meta_path_dino)


In [None]:
chad_em_well.df['Metadata_JCP2022'].unique()

In [None]:
random_200_plates = chad_em_well.df['Metadata_Plate'].sample(200).to_list()

In [None]:
random_200_plates

In [None]:
small_chad_em_well = chad_em_well.filter_and_instantiate(Metadata_Plate=random_200_plates)

In [None]:

for model_name in list(small_chad_em_well.embeddings):
    small_chad_em_well.apply_spherizing_transform(embeddings_name=f"{model_name}", new_embeddings_name=f"{model_name}_sph", norm_embeddings=False)
    small_chad_em_well.apply_inverse_normal_transform(embeddings_name=f"{model_name}_sph", new_embeddings_name=f"{model_name}_sph_int")


In [None]:
small_chad_em_well_poscon = small_chad_em_well.filter_and_instantiate(Metadata_JCP2022=chad_em_well.JCP_ID_poscon)

In [None]:
embeddings_to_test = [emb_name for emb_name in list(small_chad_em_well_poscon.embeddings) if "sph_int" in emb_name]

In [None]:
maps_source = small_chad_em_well.compute_maps(labels_column="Metadata_Source", embeddings_names=embeddings_to_test, random_maps=False, plot=True)

In [None]:
maps_jcp = small_chad_em_well_poscon.compute_maps(labels_column="Metadata_JCP2022", embeddings_names=embeddings_to_test, random_maps=False, plot=True)

In [None]:
maps_jcp = small_chad_em_well_poscon.compute_maps(labels_column="Metadata_JCP2022", embeddings_names=embeddings_to_test, random_maps=False, plot=True)

In [None]:
maps_jcp[['Label', 'mAP (chad_cls_sph_int)', 'mAP (chad_cls_sm02_sph_int)','mAP (resnet50_sph_int)']]

In [None]:
lisi_jcp = small_chad_em_well_poscon.compute_lisi(labels_column="Metadata_JCP2022", embeddings_names=embeddings_to_test, plot=True, n_neighbors_list=[5, 20, 40])

In [None]:
lisi_jcp

In [None]:
lisi_jcp = small_chad_em_well_poscon.compute_lisi(labels_column="Metadata_JCP2022", embeddings_names=embeddings_to_test, plot=True)

In [None]:
lisi_jcp

In [None]:
df = pd.read_parquet("/projects/cpjump1/jump/load_data/ctrls_images.parquet")

In [None]:
df

In [None]:
df.iloc[25]['FileName_OrigAGP']

## UMAP

In [None]:
few_plates_em = small_chad_em_well_poscon.filter_and_instantiate(Metadata_Plate=small_chad_em_well_poscon.df['Metadata_Plate'].sample(10).to_list())

In [None]:
red = "UMAP"
for emb in embeddings_to_test:
    print(emb)
    few_plates_em.plot_dimensionality_reduction(embedding_name=emb, reduction_method=red, color_by="Metadata_JCP2022")

In [None]:

red = "UMAP"
for emb in embeddings_to_test:
    print(emb)
    few_plates_em.plot_dimensionality_reduction(embedding_name=emb, reduction_method=red, color_by="Metadata_Plate")

In [None]:
embeddings_to_test

In [None]:
df = small_chad_em_well_poscon.compute_maps(labels_column="Metadata_JCP2022", embeddings_names=['chad_cls',
 'chad_cls_sm02',
 'dinov2_s',
 'resnet50',
 'chada'], random_maps=False, plot=True)