# Evaluate results from the predict_metabolites.py pipeline

In [40]:
import pandas as pd
from ipywidgets import interact, IntSlider
from rdkit.Chem import Draw
from rdkit.Chem import MolFromSmiles

In [None]:
from pandas.core.frame import DataFrame


loose_df = pd.read_csv("GLORYx_data/test/results_loose_priority_corrected/metabolite_predictions.csv")
strict_df: DataFrame = pd.read_csv("GLORYx_data/test/results_strict/metabolite_predictions.csv")

In [42]:
len(loose_df), len(strict_df)

(1752, 1752)

In [43]:
@interact(i = IntSlider(min=0, max=len(loose_df), step=1))
def f(i):
    row = loose_df.iloc[i]
    parent_name = row.parent_name
    parent_smiles = row.parent_smiles
    metabolite_smiles = row.metabolite_smiles
    reaction = row.reaction
    rule_subset = row.rule_subset
    som = row.som
    score = row.score

    parent_mol = MolFromSmiles(parent_smiles)
    metabolite_mol = MolFromSmiles(metabolite_smiles)

    print(parent_name)
    print(reaction)
    print(rule_subset)
    print(score)

    drawing = Draw.MolsToGridImage(
        mols=[parent_mol, metabolite_mol],
        subImgSize=(400, 400),
    )

    display(drawing)

interactive(children=(IntSlider(value=0, description='i', max=1752), Output()), _dom_classes=('widget-interact…

In [44]:
@interact(i = IntSlider(min=0, max=len(strict_df), step=1))
def f(i):
    row = strict_df.iloc[i]
    parent_name = row.parent_name
    parent_smiles = row.parent_smiles
    metabolite_smiles = row.metabolite_smiles
    reaction = row.reaction
    rule_subset = row.rule_subset
    som = row.som
    score = row.score

    parent_mol = MolFromSmiles(parent_smiles)
    metabolite_mol = MolFromSmiles(metabolite_smiles)

    print(parent_name)
    print(reaction)
    print(rule_subset)
    print(score)

    drawing = Draw.MolsToGridImage(
        mols=[parent_mol, metabolite_mol],
        subImgSize=(400, 400),
    )

    display(drawing)

interactive(children=(IntSlider(value=0, description='i', max=1752), Output()), _dom_classes=('widget-interact…

In [45]:
true_df = pd.read_csv("GLORYx_data/test/gloryx_test_dataset_metabolites_exploded.csv")

In [46]:
len(true_df)

136

In [47]:
list(true_df)

['parent_name',
 'parent_smiles',
 'metabolite_name',
 'metabolite_smiles',
 'generation',
 'doi']

In [48]:
@interact(i = IntSlider(min=0, max=len(true_df), step=1))
def f(i):
    row = true_df.iloc[i]
    parent_name = row.parent_name
    parent_smiles = row.parent_smiles
    metabolite_smiles = row.metabolite_smiles

    parent_mol = MolFromSmiles(parent_smiles)
    metabolite_mol = MolFromSmiles(metabolite_smiles)

    print(parent_name)
    
    drawing = Draw.MolsToGridImage(
        mols=[parent_mol, metabolite_mol],
        subImgSize=(400, 400),
    )

    display(drawing)

interactive(children=(IntSlider(value=0, description='i', max=136), Output()), _dom_classes=('widget-interact'…

In [49]:
def add_binary_labels(predicted_df, true_df):
    """
    Add a binary 'label' column to predicted_df indicating whether each prediction
    exists in the true_df based on parent_smiles and metabolite_smiles matching.
    And do the same for the true_df (add a 'label' column to the dataframe indicating
    whether each true metabolite exists in the predicted_df based on parent_smiles and 
    metabolite_smiles matching).
    
    Args:
        predicted_df: DataFrame with predictions (must have 'parent_smiles' and 'metabolite_smiles' columns)
        true_df: DataFrame with true metabolites (must have 'parent_smiles' and 'metabolite_smiles' columns)
    
    Returns:
        predicted_df with added 'label' column (1 for true positive, 0 for false positive)
        true_df with added 'label' column (1 for true positive, 0 for false positive)
    """
    # Create a set of tuples for fast lookup
    true_pairs = set(zip(true_df['parent_smiles'], true_df['metabolite_smiles']))
    predicted_pairs = set(zip(predicted_df['parent_smiles'], predicted_df['metabolite_smiles']))
    
    # Add binary label column to predicted_df
    predicted_df['label'] = predicted_df.apply(
        lambda row: 1 if (row['parent_smiles'], row['metabolite_smiles']) in true_pairs else 0, 
        axis=1
    )

    # Add binary label column to true_df
    true_df['label'] = true_df.apply(
        lambda row: 1 if (row['parent_smiles'], row['metabolite_smiles']) in predicted_pairs else 0, 
        axis=1
    )

    # The code below is to compute labels in case we do not consider parent/metabolite pairs
    # but only the metabolites (this is suboptimal but I suspect this is what they did in the paper)
    # It only improves the metrics minimally, though.

    # # Add binary label column to predicted_df
    # predicted_df['label'] = predicted_df.apply(
    #     lambda row: 1 if row['metabolite_smiles'] in true_df["metabolite_smiles"].to_list() else 0, 
    #     axis=1
    # )

    # # Add binary label column to true_df
    # true_df['label'] = true_df.apply(
    #     lambda row: 1 if row['metabolite_smiles'] in predicted_df["metabolite_smiles"].to_list() else 0, 
    #     axis=1
    # )
    
    return predicted_df, true_df

strict_df, true_df_strict_labels = add_binary_labels(predicted_df=strict_df, true_df=true_df)
loose_df, true_df_loose_labels = add_binary_labels(predicted_df=loose_df, true_df=true_df)

In [50]:
from sklearn.metrics import roc_auc_score

def compute_metrics(predicted_df, true_df):

    roc_auc = roc_auc_score(y_true=predicted_df.label, y_score=predicted_df.score)

    true_positives = len(predicted_df[predicted_df.label == 1])
    false_positives = len(predicted_df[predicted_df.label == 0])
    false_negatives = len(true_df[true_df.label == 0])

    print(f"Number of predicted metabolites: {len(predicted_df)}")
    print(f"Number of true positives: {true_positives}")
    print(f"Number of false positives: {false_positives}")
    print(f"Number of false negatives: {false_negatives}")

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1_score = 2 * (precision * recall) / (precision + recall)

    print(f"ROC AUC: {roc_auc}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 score: {f1_score}")

In [51]:
compute_metrics(predicted_df=loose_df, true_df=true_df_loose_labels)

Number of predicted metabolites: 1752
Number of true positives: 110
Number of false positives: 1642
Number of false negatives: 36
ROC AUC: 0.7335317240615656
Precision: 0.06278538812785388
Recall: 0.7534246575342466
F1 score: 0.11591148577449947


Results loose SOMs:

- Number of predicted metabolites: 1752
- Number of true positives: 110
- Number of false positives: 1642
- Number of false negatives: 36
- ROC AUC: 0.7335317240615656
- Precision: 0.06278538812785388
- Recall: 0.7534246575342466
- F1 score: 0.11591148577449947

Results loose SOMs with priority:

- Number of predicted metabolites: 1752
- Number of true positives: 110
- Number of false positives: 1642
- Number of false negatives: 36
- ROC AUC: 0.7714649540471707
- Precision: 0.06278538812785388
- Recall: 0.7534246575342466
- F1 score: 0.11591148577449947

Results loose SOMs with corrected priority:

In [52]:
compute_metrics(predicted_df=strict_df, true_df=true_df_strict_labels)

Number of predicted metabolites: 1752
Number of true positives: 110
Number of false positives: 1642
Number of false negatives: 36
ROC AUC: 0.6753211161554645
Precision: 0.06278538812785388
Recall: 0.7534246575342466
F1 score: 0.11591148577449947
