In [1]:
# RaMP-DB API Client Tutorial

"""
This tutorial demonstrates how to use the RaMP-DB API client module
for metabolic pathway and biochemical data analysis.
"""

from typing import List, Dict
from ramp_client import RaMPClient, RaMPConfig, RaMPAPIError
import time

def main():
    # 1. Initialize the client
    # You can use default settings
    client = RaMPClient()
    
    # Or customize the configuration
    custom_config = RaMPConfig(
        base_url="https://rampdb.nih.gov/api",
        timeout=60  # Extended timeout for larger queries
    )
    client_custom = RaMPClient(config=custom_config)

    # 2. Basic Database Information
    try:
        # Get source database versions
        versions = client.get_source_versions()
        print("Database versions:", versions)

        # Get valid ID types
        id_types = client.get_id_types()
        print("Valid ID types:", id_types)
    except RaMPAPIError as e:
        print(f"Error getting database info: {e}")

    # 3. Metabolite Analysis Example
    def analyze_metabolites(metabolite_ids: List[str]):
        try:
            # Add 'hmdb:' prefix if not present
            formatted_ids = [
                f"hmdb:{mid}" if not mid.startswith("hmdb:") else mid 
                for mid in metabolite_ids
            ]
            
            # Get chemical properties
            properties = client.get_chemical_properties(formatted_ids)
            print("\nChemical properties:")
            print(properties)

            # Get chemical classes
            classes = client.get_chemical_classes(formatted_ids)
            print("\nChemical classes:")
            print(classes)

            # Get pathways
            pathways = client.get_pathways_from_analytes(formatted_ids)
            
            # Analyze pathway statistics
            if "result" in pathways and pathways["result"]:
                stats = client.analyze_pathway_stats(pathways)
                for metabolite_id, pathway_stats in stats.items():
                    print(f"\nPathway stats for {metabolite_id}:")
                    print(f"Total pathways: {pathway_stats.total_pathways}")
                    print(f"Pathways by source: {pathway_stats.pathways_by_source}")
                    print(f"Unique pathway names: {len(pathway_stats.unique_pathway_names)}")
                    print(f"Pathway sources: {pathway_stats.pathway_sources}")

                # Find pathway overlaps
                overlaps = client.find_pathway_overlaps(pathways)
                print("\nPathway overlaps:")
                for pathway, count in overlaps.items():
                    if count > 1:  # Show only pathways shared by multiple metabolites
                        print(f"{pathway}: shared by {count} metabolites")
            else:
                print("No pathway data found")

        except RaMPAPIError as e:
            print(f"Error in metabolite analysis: {e}")

    # 4. Protein Pathway Analysis Example
    def analyze_protein_pathways(protein_ids: List[str]):
        try:
            # Format protein IDs with uniprot prefix for pathway analysis
            formatted_protein_ids = [
                f"uniprot:{pid}" if not pid.startswith("uniprot:") else pid 
                for pid in protein_ids
            ]
            
            # Get pathways associated with proteins
            protein_pathways = client.get_pathways_from_analytes(formatted_protein_ids)
            print("\nProtein pathways:")
            print(protein_pathways)

            # Analyze pathway statistics
            if "result" in protein_pathways and protein_pathways["result"]:
                stats = client.analyze_pathway_stats(protein_pathways)
                for protein_id, pathway_stats in stats.items():
                    print(f"\nPathway stats for {protein_id}:")
                    print(f"Total pathways: {pathway_stats.total_pathways}")
                    print(f"Pathways by source: {pathway_stats.pathways_by_source}")
                    print(f"Unique pathway names: {len(pathway_stats.unique_pathway_names)}")
                    print(f"Pathway sources: {pathway_stats.pathway_sources}")
            else:
                print("No pathway data found")

        except RaMPAPIError as e:
            print(f"Error in protein pathway analysis: {e}")

    # 5. Combined Analysis Example
    def analyze_metabolites_and_proteins(
        metabolite_ids: List[str], protein_ids: List[str]
    ):
        try:
            # Combine IDs with appropriate prefixes
            combined_analytes = (
                [f"hmdb:{mid}" if not mid.startswith("hmdb:") else mid 
                 for mid in metabolite_ids] +
                [f"uniprot:{pid}" if not pid.startswith("uniprot:") else pid 
                 for pid in protein_ids]
            )

            # Get common reaction analytes
            common_reactions = client.get_common_reaction_analytes(combined_analytes)
            print("\nCommon reaction analytes:")
            print(common_reactions)

            # Attempt to get reactions with retry
            max_retries = 3
            for attempt in range(max_retries):
                try:
                    reactions = client.get_reactions_from_analytes(combined_analytes)
                    print("\nReactions:")
                    print(reactions)
                    break
                except RaMPAPIError as e:
                    if attempt == max_retries - 1:
                        print(f"Failed to get reactions after {max_retries} attempts: {e}")
                    else:
                        time.sleep(2)  # Wait before retry

            # Attempt to get reaction classes with retry
            for attempt in range(max_retries):
                try:
                    reaction_classes = client.get_reaction_classes(combined_analytes)
                    print("\nReaction classes:")
                    print(reaction_classes)
                    break
                except RaMPAPIError as e:
                    if attempt == max_retries - 1:
                        print(f"Failed to get reaction classes after {max_retries} attempts: {e}")
                    else:
                        time.sleep(2)  # Wait before retry

        except RaMPAPIError as e:
            print(f"Error in combined analysis: {e}")

    # 6. Ontology Analysis Example
    def analyze_ontologies(metabolite_ids: List[str], ontology_terms: List[str]):
        try:
            # Format metabolite IDs
            formatted_ids = [
                f"hmdb:{mid}" if not mid.startswith("hmdb:") else mid 
                for mid in metabolite_ids
            ]
            
            # Get ontologies from metabolites
            ontologies = client.get_ontologies_from_metabolites(
                formatted_ids, names_or_ids="ids"
            )
            print("\nOntologies for metabolites:")
            print(ontologies)

            # Attempt to get metabolites from ontologies with retry
            max_retries = 3
            for attempt in range(max_retries):
                try:
                    metabolites = client.get_metabolites_from_ontologies(
                        ontology_terms, output_format="json"
                    )
                    print("\nMetabolites for ontologies:")
                    print(metabolites)
                    break
                except RaMPAPIError as e:
                    if attempt == max_retries - 1:
                        print(f"Failed to get metabolites from ontologies after {max_retries} attempts: {e}")
                    else:
                        time.sleep(2)  # Wait before retry

        except RaMPAPIError as e:
            print(f"Error in ontology analysis: {e}")

    # 7. Chemical Enrichment Analysis Example
    def perform_enrichment_analysis(metabolite_ids: List[str]):
        try:
            # Format metabolite IDs
            formatted_ids = [
                f"hmdb:{mid}" if not mid.startswith("hmdb:") else mid 
                for mid in metabolite_ids
            ]
            
            # Attempt enrichment analysis with retry
            max_retries = 3
            for attempt in range(max_retries):
                try:
                    enrichment_results = client.perform_chemical_enrichment(formatted_ids)
                    print("\nChemical enrichment results:")
                    print(enrichment_results)
                    break
                except RaMPAPIError as e:
                    if attempt == max_retries - 1:
                        print(f"Failed to perform enrichment analysis after {max_retries} attempts: {e}")
                    else:
                        time.sleep(2)  # Wait before retry

        except RaMPAPIError as e:
            print(f"Error in enrichment analysis: {e}")

    # Example usage with real metabolite and protein IDs
    metabolite_ids = ["HMDB0000001", "HMDB0000002"]  # Example metabolite IDs
    protein_ids = ["P31323", "P04637"]  # Example protein IDs
    ontology_terms = ["CHEBI:15903"]  # Example ontology terms

    print("\n=== Metabolite Analysis ===")
    analyze_metabolites(metabolite_ids)

    print("\n=== Protein Pathway Analysis ===")
    analyze_protein_pathways(protein_ids)

    print("\n=== Combined Analysis ===")
    analyze_metabolites_and_proteins(metabolite_ids, protein_ids)

    print("\n=== Ontology Analysis ===")
    analyze_ontologies(metabolite_ids, ontology_terms)

    print("\n=== Enrichment Analysis ===")
    perform_enrichment_analysis(metabolite_ids)

if __name__ == "__main__":
    main()

Database versions: {'data': [{'ramp_db_version': '2.5.2', 'db_mod_date': '2024-03-27 08:19:31.246126', 'status': 'current', 'data_source_id': 'hmdb', 'data_source_name': 'HMDB', 'data_source_url': 'https://hmdb.ca/', 'data_source_version': 'v5.0 (2021-11-17)'}, {'ramp_db_version': '2.5.2', 'db_mod_date': '2024-03-27 08:19:31.246126', 'status': 'current', 'data_source_id': 'reactome', 'data_source_name': 'Reactome', 'data_source_url': 'https://reactome.org/', 'data_source_version': 'v87 (Dec 2023)'}, {'ramp_db_version': '2.5.2', 'db_mod_date': '2024-03-27 08:19:31.246126', 'status': 'current', 'data_source_id': 'wiki', 'data_source_name': 'WikiPathways', 'data_source_url': 'https://www.wikipathways.org/index.php/WikiPathways', 'data_source_version': 'v20240210 (2024-02-10)'}, {'ramp_db_version': '2.5.2', 'db_mod_date': '2024-03-27 08:19:31.246126', 'status': 'current', 'data_source_id': 'kegg', 'data_source_name': 'KEGG', 'data_source_url': 'https://www.genome.jp/kegg/', 'data_source_ve