Annotating Metabolomics Features with ipaPy2

Install the ipaPy2 package directly from GitHub

pip install git+https://github.com/francescodc87/ipaPy2.git

Loading libraries and databases

In [None]:
# Load libraries
import pandas as pd
import numpy as np
import json

# Adducts Table: known mass shifts for common ionization adducts
adducts = pd.read_csv(r"C:\Users\samri\Documents\101_bioinformatics\IPA\ipaPy2\DB\adducts.csv")

# Main Database (DB): MS1-level reference information containing metabolite entries (m/z and RT).
DB = pd.read_csv(r"C:\Users\samri\Documents\101_bioinformatics\IPA\ipaPy2\DB\IPA_MS1.csv")


Load Metabolomics Data for Annotation

In [None]:
#df2=pd.read_csv(r"C:\Users\samri\Documents\101_bioinformatics\DISS\Multi-Omics\metabolomics\ipa_input_allFeatures_pos_C_vs_B.csv")
df2=pd.read_csv(r"C:\Users\samri\Documents\101_bioinformatics\DISS\Multi-Omics\metabolomics\ipa_input_allFeatures_neg_C_vs_B.csv")
df2.head()

Cluster features

In [None]:
from ipaPy2 import ipa
df_clustered = ipa.clusterFeatures(df2)


Run IPA feature annotation
Use the simpleIPA function:

Inputs:

Clustered feature data.

Positive ionisation mode (ionisation=1) or adjust as needed.

Reference database (DB) and adducts list.

Mass accuracy window (ppm=3).

In [None]:
annotations = ipa.simpleIPA(df_clustered, ionisation=1, DB=DB, adductsAll=adducts, ppm=3)

Flatten and Format Annotations
The output of simpleIPA is a nested dictionary (features mapped to multiple possible annotations).

Flatten this into a clean list of records:

Each record = one annotation linked to one feature.

In [None]:
# Initialize an empty list to collect flattened annotation records
flattened_annotations = []

# Loop over each feature ID and its corresponding list of annotations
for feature_id, annotation_list in annotations.items():
    
    # Loop through each annotation entry for the current feature
    for annotation in annotation_list:
        
        # Check if the annotation is a dictionary (structured annotation)
        if isinstance(annotation, dict):
            
            # Create a new record starting with the Feature ID
            record = {"Feature_ID": feature_id}
            
            # Add all key-value pairs from the annotation dictionary into the record
            record.update(annotation)
            
            # Append the completed record to the flattened list
            flattened_annotations.append(record)
        
        else:
            # If the annotation is not a dictionary (e.g., a simple string or note),
            # create a record with Feature ID and a simple Annotation field
            flattened_annotations.append({"Feature_ID": feature_id, "Annotation": annotation})


Save Flat Annotations to CSV
Convert flattened records to a DataFrame and export as CSV.


In [None]:
annotations_df = pd.DataFrame(flattened_annotations)
annotations_df.to_csv(r"C:\Users\samri\Documents\101_bioinformatics\IPA\annotations_output_flat_allFeatures_pos.csv", index=False)

Serialize and Clean Annotations for JSON Export
Because IPA outputs may contain NaN values (which are invalid in JSON), they are replaced with None for clean serialization.

In [None]:

# Define a recursive function to serialize an object,
# replacing NaNs with None (which are valid in JSON)
def serialize_annotations(obj):
    
    # If the object is a pandas DataFrame
    if isinstance(obj, pd.DataFrame):
        # Replace NaN values with None and convert to a list of records (dictionaries)
        return obj.replace({np.nan: None}).to_dict(orient="records")
    
    # If the object is a dictionary
    elif isinstance(obj, dict):
        # Recursively apply serialization to each key-value pair
        return {k: serialize_annotations(v) for k, v in obj.items()}
    
    # If the object is a list
    elif isinstance(obj, list):
        # Recursively apply serialization to each item in the list
        return [serialize_annotations(i) for i in obj]
    
    # If the object is a float and is NaN
    elif isinstance(obj, float) and np.isnan(obj):
        # Replace the NaN value with None
        return None
    
    # Otherwise, return the object unchanged
    else:
        return obj

# Apply cleaning function
annotations_serializable = serialize_annotations(annotations)


Save Cleaned Annotations to JSON
Export the cleaned annotations dictionary as a readable JSON file.

In [None]:
output_path = r"C:\Users\samri\Documents\101_bioinformatics\IPA\ipaPy2\annotations_cleaned_allFeatures_neg.json"

with open(output_path, "w") as f:
    json.dump(annotations_serializable, f, indent=4)

print(f"Cleaned annotations saved to: {output_path}")
