# Imports

In [None]:
from phenoseeker import EmbeddingManager
from pathlib import Path
import pandas as pd
import numpy as np

# Load chad img embeddings

In [None]:
base_path = Path("/projects/imagesets4/temp_embeds/")

In [None]:
! ls /projects/imagesets4/temp_embeds/

In [None]:

chad_cls_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_embeds.npy")
chad_cls_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_dataframe.parquet")

chad_cls_sm02_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_sm02_embeds.npy")
chad_cls_sm02_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_sm02_dataframe.parquet")

chad_cls_sm12x02_w_regs_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_sm12x02_w_regs_embeds.npy")
chad_cls_sm12x02_w_regs_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_sm12x02_w_regs_dataframe.parquet")

chad_cls_w_regs_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_w_regs_embeds.npy")
chad_cls_w_regs_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_w_regs_dataframe.parquet")

chad_cls_sm02_w_regs_feats = base_path / Path("ctrls_images_chad_dinov2s_cls_sm02_w_regs_embeds.npy")
chad_cls_sm02_w_regs_metadata = base_path / Path("ctrls_images_chad_dinov2s_cls_sm02_w_regs_dataframe.parquet")

In [None]:
chad_em_img = EmbeddingManager(chad_cls_sm02_w_regs_metadata, entity="image")

In [None]:
chad_em_img.load("chad_cls", chad_cls_feats, chad_cls_metadata)
chad_em_img.load("chad_cls_sm02", chad_cls_sm02_feats, chad_cls_sm02_metadata)
chad_em_img.load("chad_cls_w_regs", chad_cls_w_regs_feats)
chad_em_img.load("chad_cls_sm02_w_regs", chad_cls_sm02_w_regs_feats, chad_cls_sm02_w_regs_metadata)
chad_em_img.load("chad_cls_sm12x02_w_regs", chad_cls_sm12x02_w_regs_feats, chad_cls_sm12x02_w_regs_metadata)


In [None]:
chad_em_well = chad_em_img.grouped_embeddings(group_by="well", cols_to_keep=['Metadata_Batch','Metadata_JCP2022', 'Metadata_InChI', "Metadata_Well"])

In [None]:

for model_name in list(chad_em_well.embeddings):
    chad_em_well.apply_spherizing_transform(embeddings_name=f"{model_name}", new_embeddings_name=f"{model_name}_sph", norm_embeddings=False)
    chad_em_well.apply_inverse_normal_transform(embeddings_name=f"{model_name}_sph", new_embeddings_name=f"{model_name}_sph_int")

chad_em_well.save_to_folder(Path('/projects/synsight/data/jump_embeddings/wells_embeddings/chad/'))

# add other wells embeddings

In [None]:

for model_name in ['dinov2_s', 'openphenom', 'resnet50', 'chada']:
    base_path = Path(f'/projects/synsight/data/jump_embeddings/wells_embeddings/{model_name}')

    meta_path_dino = base_path / f'metadata_{model_name}.parquet'
    embeddings_path_dino = base_path / f'embeddings_{model_name}.npy'
    chad_em_well.load(f"{model_name}", embeddings_path_dino, meta_path_dino)

    chad_em_well.apply_spherizing_transform(embeddings_name=f"{model_name}", new_embeddings_name=f"{model_name}_sph", norm_embeddings=False)
    chad_em_well.apply_inverse_normal_transform(embeddings_name=f"{model_name}_sph", new_embeddings_name=f"{model_name}_sph_int")


In [None]:
chad_em_well_poscon = chad_em_well.filter_and_instantiate(Metadata_JCP2022=chad_em_well.JCP_ID_poscon)

In [None]:
embeddings_to_test = [emb_name for emb_name in list(chad_em_well_poscon.embeddings) if "sph_int" in emb_name]

In [None]:
maps_source = chad_em_well.compute_maps(labels_column="Metadata_Source", embeddings_names=embeddings_to_test, random_maps=False, plot=True)

In [None]:
maps_jcp = chad_em_well_poscon.compute_maps(labels_column="Metadata_JCP2022", embeddings_names=embeddings_to_test, random_maps=False, plot=True)

In [None]:
lisi_jcp_2 = chad_em_well_poscon.compute_lisi(labels_column="Metadata_JCP2022", embeddings_names=embeddings_to_test, plot=True, n_neighbors_list=[5, 10, 20, 40])

In [None]:
df = lisi_jcp_2

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming your DataFrame is named df
# Select only the model columns (those not starting with "Ideal mixing")
model_columns = [col for col in df.columns if not col.startswith('Ideal mixing')]

plt.figure(figsize=(10, 6))
for col in model_columns:
    plt.plot(df.index, df[col], marker='o', label=col)

plt.xlabel("Index")
plt.ylabel("Values")
plt.title("Model Values")

# Place the legend outside the plot on the right side
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()  # Adjust layout so nothing is cut off
plt.show()



In [None]:
model_columns

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming your DataFrame is named df
# Select only the model columns (those not starting with "Ideal mixing")
model_columns = ['chad_cls_sph_int',
 'chad_cls_sm02_sph_int',
 'chad_cls_w_regs_sph_int',
 'chad_cls_sm02_w_regs_sph_int',
 'dinov2_s_sph_int',
 'chada_sph_int']

plt.figure(figsize=(10, 6))
for col in model_columns:
    plt.plot(df.index, df[col], marker='o', label=col)

plt.xlabel("Index")
plt.ylabel("Values")
plt.title("Model Values")

# Place the legend outside the plot on the right side
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()  # Adjust layout so nothing is cut off
plt.show()



In [None]:
df = pd.read_parquet('/projects/cpjump1/jump/load_data/final')

In [None]:
df.columns

In [None]:
len(df)

In [None]:
df.merge()