In [None]:
from phenoseeker import EmbeddingManager
from pathlib import Path
embeddings_npy = Path("/projects/synsight/data/jump_embeddings/wells_embeddings/openphenom/embeddings_openphenom_test_3_plates.npy")
Metadata_parquet = Path("/projects/synsight/data/jump_embeddings/wells_embeddings/openphenom/metadata_openphenom_test_3_plates.parquet")

In [None]:
em = EmbeddingManager(Metadata_parquet, entity="well")

In [None]:
em.load('Embeddings_mean', embeddings_npy)

# Done

In [None]:


output_folder = Path("./tmp/test")
output_folder.mkdir(exist_ok=True)
em.save_to_folder(folder_path=output_folder, embeddings_name="all")

print(f"Metadata and embeddings saved to {output_folder}")


In [None]:

# 2. Test Grouping Embeddings by 'Metadata_Well' or 'Metadata_InChI'
grouped_em_by_well = em.grouped_embeddings(
    group_by="compound",
    embeddings_name="Embeddings_mean",
    aggregation="mean"
)
print(f"Grouped embeddings by well. New instance created with {len(grouped_em_by_well.df)} rows.")


In [None]:

# 2. Test Grouping Embeddings by 'Metadata_Well' or 'Metadata_InChI'
grouped_em_by_well = em.grouped_embeddings(
    group_by="compound",
    embeddings_name="Embeddings_mean",
    aggregation="median"
)
print(f"Grouped embeddings by well. New instance created with {len(grouped_em_by_well.df)} rows.")


In [None]:

# 3. Test Applying Rescale
em.apply_rescale(
    embeddings_name="Embeddings_mean",
    new_embeddings_name="Embeddings_rescddaled",
    scale="0-1",
    n_jobs=1
)
print("Rescaling applied to 'Embeddings_mean'. Stored as 'Embeddings_rescaled'.")


In [None]:

# 4. Test Applying Inverse Normal Transform
em.apply_inverse_normal_transform(
    embeddings_name="Embeddings_mean",
    new_embeddings_name="Embeddings_INT",
    n_jobs=1
)
print("Inverse normal transformation applied to 'Embeddings_mean'. Stored as 'Embeddings_INT'.")


In [None]:

# 5. Test Covariance and Correlation Calculation and Plotting
em.plot_covariance_and_correlation(
    embeddings_name="Embeddings_mean",
    by_sample=True,
    use_dmso=True,
    dmso_only=True,
    sort_by='Metadata_Plate'
)
print("Covariance and correlation matrices plotted.")


In [None]:

# 6. Test Computing Distance Matrix
em.compute_distance_matrix(
    embedding_name="Embeddings_mean",
    distance="cosine",
    n_jobs=2    
)
print("Cosine distance matrix computed for 'Embeddings_mean'.")


In [None]:

# 7. Test LISI Score Computation
lisi_scores = em.compute_lisi(
    labels_column="Metadata_Source",
    embeddings_names=["Embeddings_mean"],
    n_neighbors_list=[10, 20, 30],
    plot=True
)
print("LISI scores computed:")
print(lisi_scores)


In [None]:

# 8. Test Feature Distribution Testing
feature_distribution_test = em.test_feature_distributions(
    embedding_name="Embeddings_mean",
    continuous_distributions=["norm", "lognorm"]
)
print("Feature distribution testing results:")
print(feature_distribution_test)


In [None]:
em.apply_median_polish("Embeddings_mean", n_jobs=2)

In [None]:

# 9. Test Spherizing
em.apply_spherizing_transform(
    embeddings_name="Embeddings_mean",
    new_embeddings_name="Embeddings_spherized",
    method="ZCA",
    norm_embeddings=True,
    use_control=True,
    n_jobs=2,
)
print("Spherizing applied to 'Embeddings_mean'. Stored as 'Embeddings_spherized'.")


In [None]:

# 10. Test Filtering and Instantiating
filtered_em = em.filter_and_instantiate(Metadata_Source=["Source_1", "Source_2"])
print(f"Filtered instance created with {len(filtered_em.df)} rows.")


In [None]:

# 11. Test Removing Features
em.remove_features(
    embedding_name="Embeddings_mean",
    threshold=0.01,
    metrics="iqrs"
)
print("Features with low variance removed from 'Embeddings_mean'.")


In [None]:

# 12. Test Dimensionality Reduction and Plotting
em.plot_dimensionality_reduction(
    embedding_name="Embeddings_mean",
    reduction_method="PCA",
    color_by="Metadata_Source",
    n_components=2
)
print("PCA dimensionality reduction performed and plotted.")


# To do

In [None]:
em.embeddings.keys()

In [None]:

# 13. Test Compute Maps
maps = em.compute_maps(
    labels_column="Metadata_Source",
    embeddings_names=['Embeddings_mean', 'Embeddings_spherized', 'Embeddings_MedianPolish'],
    distance="cosine",
    plot=True,
    random_maps=False,
    n_jobs=3
)
print("Mean Average Precision (MAP) computed:")

maps


In [None]:

# 13. Test Compute Maps
maps = em.compute_maps(
    labels_column="Metadata_JCP2022",
    embeddings_names=['Embeddings_mean', 'Embeddings_spherized', 'Embeddings_MedianPolish'],
    distance="cosine",
    plot=True,
    random_maps=False,
    n_jobs=3
)
print("Mean Average Precision (MAP) computed:")

maps


In [None]:

# 13. Test Compute Maps
maps = em.compute_maps(
    labels_column="Metadata_Source",
    embeddings_names={"Embeddings_mean": "Embeddings_mean"},
    distance="euclidean",
    random_maps=True
)
print("Mean Average Precision (MAP) computed:")
print(maps)
