# Imports 

In [None]:
import pandas as pd
import numpy as np 
from pathlib import Path
from phenoseeker import BioproxyEvaluator, EmbeddingManager

In [None]:
base_path = Path("/projects/synsight/data/website_data")
npy_file = base_path / "jump_compounds_embeddings.npy"
parquet_metadata = base_path / Path("jump_compounds_matrix_metadata.parquet")
screens_folders = {
      #  "ChemBL": Path("/projects/synsight/repos/phenospace/bioproxy/screens_data_chembl"),
      #  "Curie": Path("/projects/synsight/repos/phenospace/bioproxy/screens_data_curie"),
        "ChEMBL": Path("/projects/synsight/repos/phenoseeker/data/ChEMBL/assays_csv"),
}

In [None]:
eval = BioproxyEvaluator(parquet_metadata, npy_file, screens_folders)

In [None]:
eval

In [None]:
eval.global_embedding_manager

In [None]:
eval.global_embedding_manager.embeddings['Embeddings'].shape

# To do

In [None]:
eval.screen_embedding_managers['Curie']['E033_3D_all-val']

In [None]:
eval.compute_enrichment_factor_for_screen(source='Curie', screen='E033_3D_all-val', embeddings_name='Embeddings', thresholds=[0, 0.3, 1, 1.5, 2], mode='seuil')

In [None]:
results = eval.compute_enrichment_factors(source='Curie', embeddings_name='Embeddings', thresholds=[0, 0.3, 1, 1.5, 2], mode='seuil')

In [None]:
results[results['Screen'] == 'E033_3D_all-val']

In [None]:
eval.plot_assays_distribution('ChEMBL')

# Done

In [None]:
results = eval.compute_enrichment_factors(source='ChEMBL', embeddings_name='Embeddings', thresholds=[1, 3, 5, 10])

In [None]:
eval.plot_assays_distribution('ChEMBL')

In [None]:
results[results['Threshold'] == 5]

In [None]:
df = results[results[("N Hits", "mean")] > 4]

In [None]:
len(df)/4

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Supposons que votre DataFrame s'appelle df

# Récupérer les valeurs uniques de Threshold et les trier si besoin
unique_thresholds = sorted(df[("Threshold",)].unique())

for thr in unique_thresholds:
    # Filtrer les données correspondant au threshold courant
    df_thr = df[df[("Threshold",)] == thr]
    
    # Trier les screens par ordre croissant de Hit Rate Random (mean)
    df_thr_sorted = df_thr.sort_values(by=( "Hit Rate Random", "mean"))
    
    # Récupérer la liste des screens triés
    screens = df_thr_sorted[("Screen",)].tolist()
    x = np.arange(len(screens))
    width = 0.25  # largeur des barres

    # Extraire et multiplier par 100 pour avoir des pourcentages
    hit_rate_rand = df_thr_sorted[("Hit Rate Random", "mean")].values * 100
    hit_rate_sel_mean = df_thr_sorted[("Hit Rate Selected", "mean")].values * 100
    hit_rate_sel_max = df_thr_sorted[("Hit Rate Selected", "max")].values * 100
    
    # Création du graphique
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Affichage des barres dans l'ordre : Random, Selected (mean), Selected (max)
    ax.bar(x - width, hit_rate_rand, width, label='Hit Rate Random (%)')
    ax.bar(x, hit_rate_sel_mean, width, label='Hit Rate Selected (mean) (%)')
    ax.bar(x + width, hit_rate_sel_max, width, label='Hit Rate Selected (max) (%)')
    
    # Personnalisation du graphique
    ax.set_xlabel("Screen")
    ax.set_ylabel("Hit Rate (%)")
    ax.set_title(f"Hit Rates pour Threshold = {thr}")
    ax.set_xticks(x)
    ax.set_xticklabels(screens, rotation=45)
    ax.legend()
    ax.grid(True)  # Ajout de la grille
    plt.tight_layout()
    plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Supposons que votre DataFrame s'appelle df

# Récupérer les valeurs uniques de Threshold et les trier si besoin
unique_thresholds = sorted(df[("Threshold",)].unique())

for thr in unique_thresholds:
    # Filtrer les données correspondant au threshold courant
    df_thr = df[df[("Threshold",)] == thr]
    
    # Trier les screens par ordre croissant de EF (mean)
    df_thr_sorted = df_thr.sort_values(by=( "Hit Rate Random", "mean"))
    
    # Récupérer la liste des screens triés
    screens = df_thr_sorted[("Screen",)].tolist()
    x = np.arange(len(screens))
    width = 0.35  # largeur des barres
    
    # Extraire les valeurs d'EF
    ef_mean = df_thr_sorted[("EF", "mean")].values
    ef_max = df_thr_sorted[("EF", "max")].values
    
    # Création du graphique
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Affichage des barres pour EF (mean) et EF (max)
    ax.bar(x - width/2, ef_mean, width, label='EF (mean)')
    ax.bar(x + width/2, ef_max, width, label='EF (max)')
    
    # Ajout d'une ligne horizontale rouge en pointillé pour EF = 1
    ax.axhline(y=1, color='red', linestyle='--', label='EF = 1')
    
    # Personnalisation du graphique
    ax.set_xlabel("Screen")
    ax.set_ylabel("Enrichment Factor")
    ax.set_title(f"Enrichment Factor pour Threshold = {thr}")
    ax.set_xticks(x)
    ax.set_xticklabels(screens, rotation=45)
    ax.legend()
    ax.grid(True)  # Ajout de la grille
    plt.tight_layout()
    plt.show()
