# Evaluate results from the predict_metabolites.py pipeline

In [1]:
import pandas as pd
from ipywidgets import interact, IntSlider
from rdkit.Chem import Draw
from rdkit.Chem import MolFromSmiles, MolToSmiles, MolToInchi

In [2]:
from pandas.core.frame import DataFrame


loose_df = pd.read_csv("/Users/roxanejacob/Desktop/GLORYxr/GLORYx_data/test/results_loose/metabolite_predictions.csv")
strict_df: DataFrame = pd.read_csv("/Users/roxanejacob/Desktop/GLORYxr/GLORYx_data/test/results_strict/metabolite_predictions.csv")

In [3]:
len(loose_df), len(strict_df)

(4856, 1905)

In [4]:
list(loose_df), list(strict_df)

(['parent_name',
  'parent_smiles',
  'metabolite_smiles',
  'reaction',
  'rule_subset',
  'som',
  'score'],
 ['parent_name',
  'parent_smiles',
  'metabolite_smiles',
  'reaction',
  'rule_subset',
  'som',
  'score'])

In [5]:
@interact(i = IntSlider(min=0, max=len(loose_df), step=1))
def f(i):
    row = loose_df.iloc[i]
    parent_name = row.parent_name
    parent_smiles = row.parent_smiles
    metabolite_smiles = row.metabolite_smiles
    reaction = row.reaction
    rule_subset = row.rule_subset
    som = row.som
    score = row.score

    parent_mol = MolFromSmiles(parent_smiles)
    metabolite_mol = MolFromSmiles(metabolite_smiles)

    print(parent_name)
    print(reaction)
    print(rule_subset)
    print(score)

    drawing = Draw.MolsToGridImage(
        mols=[parent_mol, metabolite_mol],
        subImgSize=(400, 400),
    )

    display(drawing)

interactive(children=(IntSlider(value=0, description='i', max=4856), Output()), _dom_classes=('widget-interact…

In [6]:
@interact(i = IntSlider(min=0, max=len(strict_df), step=1))
def f(i):
    row = strict_df.iloc[i]
    parent_name = row.parent_name
    parent_smiles = row.parent_smiles
    metabolite_smiles = row.metabolite_smiles
    reaction = row.reaction
    rule_subset = row.rule_subset
    som = row.som
    score = row.score

    parent_mol = MolFromSmiles(parent_smiles)
    metabolite_mol = MolFromSmiles(metabolite_smiles)

    print(parent_name)
    print(reaction)
    print(rule_subset)
    print(score)

    drawing = Draw.MolsToGridImage(
        mols=[parent_mol, metabolite_mol],
        subImgSize=(400, 400),
    )

    display(drawing)

interactive(children=(IntSlider(value=0, description='i', max=1905), Output()), _dom_classes=('widget-interact…

In [7]:
true_df = pd.read_csv("/Users/roxanejacob/Desktop/GLORYxr/GLORYx_data/test/gloryx_test_dataset_metabolites_exploded.csv")

In [8]:
len(true_df)

136

In [9]:
list(true_df)

['parent_name',
 'parent_smiles',
 'metabolite_name',
 'metabolite_smiles',
 'generation',
 'doi']

In [10]:
@interact(i = IntSlider(min=0, max=len(true_df), step=1))
def f(i):
    row = true_df.iloc[i]
    parent_name = row.parent_name
    parent_smiles = row.parent_smiles
    metabolite_smiles = row.metabolite_smiles

    parent_mol = MolFromSmiles(parent_smiles)
    metabolite_mol = MolFromSmiles(metabolite_smiles)

    print(parent_name)
    
    drawing = Draw.MolsToGridImage(
        mols=[parent_mol, metabolite_mol],
        subImgSize=(400, 400),
    )

    display(drawing)

interactive(children=(IntSlider(value=0, description='i', max=136), Output()), _dom_classes=('widget-interact'…

In [11]:
# Compute non-stereo inchis

true_df["parent_inchi"] = true_df["parent_smiles"].apply(lambda x: MolToInchi(MolFromSmiles(x), options='SNon'))
true_df["metabolite_inchi"] = true_df["metabolite_smiles"].apply(lambda x: MolToInchi(MolFromSmiles(x), options='SNon'))

loose_df["parent_inchi"] = loose_df["parent_smiles"].apply(lambda x: MolToInchi(MolFromSmiles(x), options='SNon'))
loose_df["metabolite_inchi"] = loose_df["metabolite_smiles"].apply(lambda x: MolToInchi(MolFromSmiles(x), options='SNon'))

strict_df["parent_inchi"] = strict_df["parent_smiles"].apply(lambda x: MolToInchi(MolFromSmiles(x), options='SNon'))
strict_df["metabolite_inchi"] = strict_df["metabolite_smiles"].apply(lambda x: MolToInchi(MolFromSmiles(x), options='SNon'))











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































In [None]:
def add_binary_labels(predicted_df, true_df):
    """
    Add a binary 'label' column to predicted_df indicating whether each prediction
    exists in the true_df based on parent_inchi and metabolite_inchi matching.
    And do the same for the true_df (add a 'label' column to the dataframe indicating
    whether each true metabolite exists in the predicted_df based on parent_inchi and 
    metabolite_inchi matching).
    
    Args:
        predicted_df: DataFrame with predictions (must have 'parent_inchi' and 'metabolite_inchi' columns)
        true_df: DataFrame with true metabolites (must have 'parent_inchi' and 'metabolite_inchi' columns)
    
    Returns:
        predicted_df with added 'label' column (1 for true positive, 0 for false positive)
        true_df with added 'label' column (1 for true positive, 0 for false positive)
    """
    # Create a set of tuples for fast lookup
    true_pairs = set(zip(true_df['parent_inchi'], true_df['metabolite_inchi']))
    predicted_pairs = set(zip(predicted_df['parent_inchi'], predicted_df['metabolite_inchi']))
    
    # Add binary label column to predicted_df
    predicted_df['label'] = predicted_df.apply(
        lambda row: 1 if (row['parent_inchi'], row['metabolite_inchi']) in true_pairs else 0, 
        axis=1
    )

    # Add binary label column to true_df
    true_df['label'] = true_df.apply(
        lambda row: 1 if (row['parent_inchi'], row['metabolite_inchi']) in predicted_pairs else 0, 
        axis=1
    )
    
    return predicted_df, true_df

strict_df, true_df_strict_labels = add_binary_labels(predicted_df=strict_df, true_df=true_df)
loose_df, true_df_loose_labels = add_binary_labels(predicted_df=loose_df, true_df=true_df)

In [13]:
from sklearn.metrics import roc_auc_score

def compute_metrics(predicted_df, true_df):

    roc_auc = roc_auc_score(y_true=predicted_df.label, y_score=predicted_df.score)

    true_positives = len(predicted_df[predicted_df.label == 1])
    false_positives = len(predicted_df[predicted_df.label == 0])
    false_negatives = len(true_df[true_df.label == 0])

    print(f"Number of predicted metabolites: {len(predicted_df)}")
    print(f"Number of true positives: {true_positives}")
    print(f"Number of false positives: {false_positives}")
    print(f"Number of false negatives: {false_negatives}")

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1_score = 2 * (precision * recall) / (precision + recall)

    print(f"ROC AUC: {roc_auc}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 score: {f1_score}")

In [14]:
compute_metrics(predicted_df=loose_df, true_df=true_df_loose_labels)

Number of predicted metabolites: 4856
Number of true positives: 302
Number of false positives: 4554
Number of false negatives: 39
ROC AUC: 0.622228257234016
Precision: 0.06219110378912685
Recall: 0.8856304985337243
F1 score: 0.11622089667115643


In [15]:
compute_metrics(predicted_df=strict_df, true_df=true_df_strict_labels)

Number of predicted metabolites: 1905
Number of true positives: 132
Number of false positives: 1773
Number of false negatives: 39
ROC AUC: 0.7371942778034148
Precision: 0.06929133858267716
Recall: 0.7719298245614035
F1 score: 0.12716763005780346
