# Testing Expasy query helper

Testing our Expasy query helper reusing the example queries that succeeded retrieved from `test_example_queries.ipynb`

Potential interesting questions to use for evaluation:
- Select the number of proteins for each of the subclasses of ec:1.1.1.- EC (Enzyme Commission) class (from biosoda)
- 

In [1]:
import os

import requests
from dotenv import load_dotenv
from SPARQLWrapper import JSON, SPARQLWrapper

from sparql_llm.utils import extract_sparql_queries

load_dotenv()
expasy_api_key = os.getenv("CHAT_API_KEY")

example_queries = [
    #   {
    #     "question": "Was any UniProt entry integrated on the 9th of January 2013",
    #     "endpoint": "https://sparql.uniprot.org/sparql/",
    #     "query": "PREFIX up: <http://purl.uniprot.org/core/>\nASK\nWHERE\n{\n\t?protein a up:Protein .\n\t?protein up:created '2013-01-09'^^xsd:date\n}",
    #     "results": 1,
    #     "runtime": 0
    #   },
    {
        "question": "Select all taxa from the UniProt taxonomy",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nSELECT ?taxon\nFROM <http://sparql.uniprot.org/taxonomy>\nWHERE\n{\n    ?taxon a up:Taxon .\n}",
        "results": 2941742,
        "runtime": 47,
    },
    {
        "question": "How many distinct extinct organisms are represented in UniProtKB",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX keywords: <http://purl.uniprot.org/keywords/>\nSELECT \n  ?taxon\n  (SAMPLE(?name) AS ?anName)\n  (COUNT(DISTINCT ?protein) AS ?entriesPerExtinctTaxon)\nWHERE\n{\n  GRAPH<http://sparql.uniprot.org/taxonomy>{\n    ?taxon a up:Taxon ;\n           up:scientificName ?name .\n  }\n  ?protein up:organism ?taxon ;\n           up:classifiedWith keywords:952 .\n} GROUP BY ?taxon ORDER BY ?taxon",
        "results": 29,
        "runtime": 0,
    },
    {
        "question": "Select all human UniProt entries with a sequence variant that leads to a tyrosine to phenylalanine substitution",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX faldo: <http://biohackathon.org/resource/faldo#>\nSELECT ?protein ?annotation ?begin ?text\nWHERE\n{\n        ?protein a up:Protein ;\n            up:organism taxon:9606 ; \n            up:annotation ?annotation .\n        ?annotation a up:Natural_Variant_Annotation ;\n            rdfs:comment ?text ;\n            up:substitution ?substitution ;\n            up:range/faldo:begin\n                [ faldo:position ?begin ;\n                  faldo:reference ?sequence ] .\n        ?sequence rdf:value ?value .\n        BIND (substr(?value, ?begin, 1) as ?original) .\n        FILTER(?original = 'Y' && ?substitution = 'F') .\n} ",
        "results": 128,
        "runtime": 0,
    },
    {
        "question": "Select all UniProt entries that were integrated on the 30th of November 2010",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nSELECT ?protein\nWHERE\n{\n\t?protein a up:Protein . \n\t?protein up:created '2010-11-30'^^xsd:date\n} ",
        "results": 643989,
        "runtime": 107,
    },
    {
        "question": "Select the average number of cross-references to the PDB database of UniProt entries that have at least one cross-reference to the PDB database",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT (AVG(?linksToPdbPerEntry) AS ?avgLinksToPdbPerEntry)\nWHERE\n{\n\tSELECT ?protein (COUNT(DISTINCT ?db) AS ?linksToPdbPerEntry)\n\tWHERE\n\t{\n\t\t?protein a up:Protein .\n\t\t?protein rdfs:seeAlso ?db .\n\t\t?db up:database <http://purl.uniprot.org/database/PDB> .\n\t}\n\tGROUP BY ?protein ORDER BY DESC(?linksToPdbPerEntry)\n}",
        "results": 1,
        "runtime": 2,
    },
    {
        "question": "Select the number of UniProt entries for each of the EC (Enzyme Commission) top level categories",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX ec: <http://purl.uniprot.org/enzyme/>\nSELECT ?ecClass (COUNT(?protein) as ?size)\nWHERE\n{\n    VALUES (?ecClass) {(ec:1.-.-.-) (ec:2.-.-.-) (ec:3.-.-.-) (ec:4.-.-.-) (ec:5.-.-.-) (ec:6.-.-.-) (ec:7.-.-.-)} .\n    ?protein ( up:enzyme | up:domain/up:enzyme | up:component/up:enzyme ) ?enzyme .\n    # Enzyme subclasses are materialized, do not use rdfs:subClassOf+\n    ?enzyme rdfs:subClassOf ?ecClass .\n}\nGROUP BY ?ecClass ORDER BY ?ecClass\n",
        "results": 7,
        "runtime": 0,
    },
    {
        "question": "Find all Natural Variant Annotations if associated via an evidence tag to an article with a pubmed identifier",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT \n    ?accession\n    ?annotation_acc \n    ?pubmed\nWHERE\n{\n        ?protein a up:Protein ;\n            up:annotation ?annotation .\n        ?annotation a up:Natural_Variant_Annotation .\n        ?linkToEvidence rdf:object ?annotation ;\n                        up:attribution ?attribution .\n        ?attribution up:source ?source .\n        ?source a up:Journal_Citation .\n  BIND(SUBSTR(STR(?protein),33) AS ?accession)\n  BIND(IF(CONTAINS(STR(?annotation), "#SIP"), SUBSTR(STR(?annotation),33), SUBSTR(STR(?annotation),36))AS?annotation_acc)\n  BIND(SUBSTR(STR(?source),35) AS ?pubmed)\n}\n',
        "results": 197124,
        "runtime": 9,
    },
    {
        "question": "Select all bacterial taxa and their scientific name from the UniProt taxonomy",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT ?taxon ?name\nWHERE\n{\n    ?taxon a up:Taxon .\n    ?taxon up:scientificName ?name .\n    # Taxon subclasses are materialized, do not use rdfs:subClassOf+\n    ?taxon rdfs:subClassOf taxon:2 .\n}",
        "results": 556591,
        "runtime": 22,
    },
    {
        "question": "Find how often an article in pubmed was used in an evidence tag in a human protein (ordered by most used to least)",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT \n    ?source \n    (COUNT(?attribution) AS ?attribitions)\nWHERE\n{\n        ?protein a up:Protein ;\n            up:organism taxon:9606 ;\n            up:annotation ?annotation .\n        ?linkToEvidence rdf:object ?annotation ;\n                        up:attribution ?attribution .\n        ?attribution up:source ?source .\n        ?source a up:Journal_Citation .\n} GROUP BY ?source ORDER BY DESC(COUNT(?attribution))\n",
        "results": 43889,
        "runtime": 30,
    },
    {
        "question": "For two accessions find the GO term labels and group them into GO process,function and component",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\nSELECT \n    (CONCAT(SUBSTR(STR(?protein), 33)) AS ?uniprot)\n    (GROUP_CONCAT(?celtype; separator=";") AS ?celtypes)\n    (GROUP_CONCAT(?biotype; separator=";") AS ?biotypes)\n    (GROUP_CONCAT(?moltype; separator=";") AS ?moltypes)\nWHERE\n{\n    VALUES (?ac) {("Q6GZX4") ("Q96375")}\n    BIND (IRI(CONCAT("http://purl.uniprot.org/uniprot/",?ac)) AS ?protein)\n    ?protein a up:Protein .\n    ?protein up:classifiedWith ?goTerm .\n    #Determine if the type is biological_process\n    OPTIONAL {\n        ?goTerm rdfs:subClassOf <http://purl.obolibrary.org/obo/GO_0008150>.\n        ?goTerm rdfs:label ?biotype .\n    }\n    #Determine if the type is cellular_component\n    OPTIONAL {\n        ?goTerm rdfs:subClassOf <http://purl.obolibrary.org/obo/GO_0005575>.\n        ?goTerm rdfs:label ?celtype .\n    }\n    #Determine if the type is molecular_function\n    OPTIONAL {\n        ?goTerm rdfs:subClassOf <http://purl.obolibrary.org/obo/GO_0003674> .\n        ?goTerm rdfs:label ?moltype .\n    }\n    #Filter out the uniprot keywords\n    FILTER(bound(?biotype) || bound(?celtype) || bound(?moltype))\n} GROUP BY ?protein\n',
        "results": 2,
        "runtime": 0,
    },
    {
        "question": "Number of reviewed entries (Swiss-Prot) that are related to kinase activity",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX GO: <http://purl.obolibrary.org/obo/GO_>\nSELECT\n    (COUNT(DISTINCT(?protein)) AS ?pc)\nWHERE\n{   \n    ?protein rdf:type up:Protein ;\n        up:reviewed true  ;\n        up:organism taxon:9606 ;\n        up:classifiedWith|(up:classifiedWith/rdfs:subClassOf) GO:0016301 .\n}",
        "results": 1,
        "runtime": 3,
    },
    {
        "question": "Find all names associated with uniprot entry P05067, and if the name is associated with the entry it's domains or its components",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT\n  ?protein\n  ?anyKindOfName \n  ?names \n  ?partType\nWHERE\n{\n  BIND(<http://purl.uniprot.org/uniprot/P05067> AS ?protein)\n  ?protein a up:Protein .\n  {\n    ?protein (up:recommendedName|up:alternativeName) ?structuredName .\n  }\n    UNION\n  {\n    VALUES(?partType){(up:domain) (up:component)}\n    ?protein ?partType ?part .\n    ?part (up:recommendedName|up:alternativeName) ?structuredName .\n  }\n  ?structuredName ?anyKindOfName  ?names .\n  ?anyKindOfName rdfs:subPropertyOf up:structuredNameType .\n}",
        "results": 50,
        "runtime": 90,
    },
    {
        "question": "Get the list of uniprot entries for the chromosome of proteome UP000000625",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nSELECT \n  ?protein\n  ?proteome \nWHERE\n{\n  ?protein a up:Protein ;\n           up:reviewed true ;\n           up:proteome ?proteome .\n  VALUES (?proteome) {(<http://purl.uniprot.org/proteomes/UP000000625#Chromosome>)}\n}",
        "results": 8802,
        "runtime": 3,
    },
    {
        "question": "Select all UniProt entries, and their organism and amino acid sequences (including isoforms), for E. coli K12 and all its strains",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT ?protein ?organism ?isoform ?sequence\nWHERE\n{\n    ?protein a up:Protein .\n    ?protein up:organism ?organism .\n    # Taxon subclasses are materialized, do not use rdfs:subClassOf+\n    ?organism rdfs:subClassOf taxon:83333 .\n    ?protein up:sequence ?isoform .\n    ?isoform rdf:value ?sequence .\n}",
        "results": 3746,
        "runtime": 0,
    },
    {
        "question": "Find UniProt entries with merged loci in Bordetella avium",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nSELECT\n  ?protein \n  (GROUP_CONCAT(?locusName; separator=',') AS ?locusNames)\nWHERE \n{ \n  ?protein a up:Protein ;\n    up:organism taxon:360910 ;\n    up:encodedBy ?gene .\n  ?gene up:locusName ?locusName .\n} \nGROUP BY ?protein \nHAVING (COUNT(?locusName) > 1)\n",
        "results": 3349,
        "runtime": 2,
    },
    {
        "question": "Find UniProtKB entries with more than 1 Topological domain annotation",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT \n    ?protein \n    (GROUP_CONCAT(?comment; separator=", ") AS ?comments)\nWHERE\n{\n    ?protein a up:Protein ;\n            up:annotation ?annotation . \n    ?annotation rdf:type up:Topological_Domain_Annotation ;\n            rdfs:comment ?comment .\n} \nGROUP BY ?protein \nHAVING (COUNT(?annotation) > 1)\n',
        "results": 175067,
        "runtime": 10,
    },
    {
        "question": "Find longest comment text associated with a UniProtKB Natural Variant Annotation",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT \n    ?annotation ?comment\nWHERE {\n    ?annotation a up:Natural_Variant_Annotation ;\n        rdfs:comment ?comment . \n} \nORDER BY DESC(STRLEN(?comment))\n",
        "results": 66010,
        "runtime": 2,
    },
    {
        "question": "Find the co-occurence count of Topological Domain comment text in UniProtKB entries",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT \n    ?comment1 \n    ?comment2 \n    (COUNT(?comment1) AS ?count1)\nWHERE\n{\n    ?protein a up:Protein ;\n               up:annotation ?annotation1 , \n                             ?annotation2 . \n    ?annotation1 rdf:type up:Topological_Domain_Annotation ;\n        rdfs:comment ?rawComment1 .\n    ?annotation2 rdf:type up:Topological_Domain_Annotation ;\n        rdfs:comment ?rawComment2 . \n    BIND(IF(contains(?rawComment1, ';'), \n            STRBEFORE(?rawComment1,';'), \n            ?rawComment1) AS ?comment1)\n    BIND(IF(contains(?rawComment2, ';'), \n            STRBEFORE(?rawComment2,';'), \n            ?rawComment2) AS ?comment2)\n    FILTER(?annotation1 != ?annotation2)\n} \nGROUP BY ?comment1 ?comment2 \nORDER BY DESC(COUNT(?comment1))\n",
        "results": 118,
        "runtime": 21,
    },
    {
        "question": "Find the similar proteins for UniProtKB entry P05607 sorted by UniRef cluser identity",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>\nSELECT \n    ?similar ?identity\nFROM <http://sparql.uniprot.org/uniref>\nFROM <http://sparql.uniprot.org/uniprot>\nWHERE\n{\n    BIND (uniprotkb:P05607 AS ?protein)\n    ?cluster up:member ?member ;\n             up:member/up:sequenceFor ?protein;\n             up:identity ?identity .\n    ?member up:sequenceFor ?similar .\n    FILTER(!sameTerm(?similar, ?protein))\n} \nORDER BY DESC(?identity)\n",
        "results": 79,
        "runtime": 0,
    },
    {
        "question": "Find the human protein which contains an Epitope VSTQ, where T is a phosporylated threonine",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX faldo: <http://biohackathon.org/resource/faldo#>\nSELECT \n  ?protein \n  ?comment\n  ?begin\n  ?end \nWHERE\n{\n  ?protein a up:Protein ;\n    up:organism taxon:9606 ; \n    up:sequence ?sequence ;\n    up:annotation ?annotation .\n  ?annotation a up:Modified_Residue_Annotation ;\n    rdfs:comment ?comment ;\n    up:range ?range .\n  ?range \n    faldo:begin [ faldo:position ?begin ; faldo:reference ?sequence ] ;\n    faldo:end [ faldo:position ?end ; faldo:reference ?sequence ] .\n  ?sequence rdf:value ?aaSequence .\n  FILTER (SUBSTR(?aaSequence, ?begin -2 , 4) = "VSTQ")     \n  FILTER (CONTAINS(?comment, "Phosphothreonine"))\n}\n',
        "results": 2,
        "runtime": 13,
    },
    {
        "question": "For the human entry P05067 (Amyloid-beta precursor protein) find the gene start ends in WikiData",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX wdt: <http://www.wikidata.org/prop/direct/>\nPREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX ps: <http://www.wikidata.org/prop/statement/>\nPREFIX pq: <http://www.wikidata.org/prop/qualifier/>\nPREFIX p: <http://www.wikidata.org/prop/>\n\nSELECT \n\t?protein \n\t?begin\n\t?end\n\t?chromosome\n\t?assembly\nWHERE {\n    {\n        BIND(uniprotkb:P05067 AS ?proteinIRI)\n        BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein)\n    }\n    SERVICE <https://query.wikidata.org/sparql> {\n        ?wp wdt:P352 ?protein ;\n            wdt:P702 ?wg . \n        ?wg p:P644   ?wgss .\n        ?wgss ps:P644        ?begin ;\n          pq:P1057/wdt:P1813 ?chromosome ;\n          pq:P659/rdfs:label ?assembly .\n        ?wg p:P645 ?wgse .\n        ?wgse ps:P645        ?end ;\n          pq:P1057/wdt:P1813 ?chromosome ;\n          pq:P659/rdfs:label ?assembly .\n        FILTER(lang(?assembly) = "en")\n  } \n}\n',
        "results": 2,
        "runtime": 0,
    },
    {
        "question": "Retrieve entries and Catalytic activities in the reviewed (Swiss-Prot) section that have experimental evidences,",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT  \n  ?protein\n  ?rhea \nWHERE {\n  # ECO 269 is experimental evidence\n  BIND (<http://purl.obolibrary.org/obo/ECO_0000269> as ?evidence)\n  GRAPH <http://sparql.uniprot.org/uniprot> {\n    ?protein up:reviewed true ;\n      up:annotation ?a ;\n      up:attribution ?attribution  .\n\n    ?a a up:Catalytic_Activity_Annotation ;\n      up:catalyticActivity ?ca .\n    ?ca up:catalyzedReaction ?rhea .\n  \n    [] rdf:subject ?a ;\n      rdf:predicate up:catalyticActivity ;\n      rdf:object ?ca ;\n      up:attribution ?attribution .\n\n    ?attribution up:evidence ?evidence .\n  }\n}\n",
        "results": 47936,
        "runtime": 4,
    },
    {
        "question": "Select the UniProt entry with the mnemonic 'A4_HUMAN'",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nSELECT ?protein\nWHERE\n{\n    ?protein a up:Protein .\n    ?protein up:mnemonic 'A4_HUMAN'\n}",
        "results": 1,
        "runtime": 0,
    },
    {
        "question": "Connect patents cited in UniProtKB with those in the patent database at EPO via publication number.",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\nPREFIX patent: <http://data.epo.org/linked-data/def/patent/>\n\nSELECT ?citation ?patent ?application ?applicationNo\nWHERE\n{\n  ?citation a up:Patent_Citation ;\n    skos:exactMatch ?patent .\n  FILTER(CONTAINS(STR(?patent), 'EP'))\n  BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo)\n  SERVICE<https://data.epo.org/linked-data/query>{\n    ?application patent:publicationNumber ?applicationNo\n  }\n}",
        "results": 66,
        "runtime": 2,
    },
    {
        "question": "Connect patents cited in UniProtKB with those in the patent database at EPO via publication number, whose grant date is more than twenty years in the past.",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\nPREFIX patent: <http://data.epo.org/linked-data/def/patent/>\n\nSELECT ?grantDate ?patent ?application ?applicationNo\nWHERE\n{\n    ?citation a up:Patent_Citation ;\n  skos:exactMatch ?patent .\n  BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo)\n  BIND(SUBSTR(STR(?patent), 33, 2) AS ?countryCode)\n  SERVICE<https://data.epo.org/linked-data/query>{\n    ?publication patent:publicationNumber ?applicationNo ;\n                 patent:application ?application . \n    ?application patent:grantDate ?grantDate .\n  }\n  BIND((year(now()) - 20) AS ?thisYearMinusTwenty)\n  BIND(year(?grantDate) AS ?grantYear)\n  FILTER(?grantYear < ?thisYearMinusTwenty)\n} ORDER BY ?grantYear",
        "results": 71,
        "runtime": 12,
    },
    {
        "question": "Select a mapping of UniProt to PDB entries using the UniProt cross-references to the PDB database",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT ?protein ?db\nWHERE\n{\n    ?protein a up:Protein .\n    ?protein rdfs:seeAlso ?db .\n    ?db up:database <http://purl.uniprot.org/database/PDB>\n}",
        "results": 377848,
        "runtime": 36,
    },
    {
        "question": "Find mouse homologs in OMABrowser of human enzymes that catalyze reactions involving Sterols (CHEBI:15889). Federating with Rhea-DB and OMABrowser.",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>\nSELECT\n  DISTINCT\n    ?chebi\n    ?reaction\n    ?humanProtein\n    ?mouseProtein\n    ?cluster \nWHERE {\n  SERVICE <https://sparql.rhea-db.org/sparql> {\n    ?reaction rdfs:subClassOf rh:Reaction .\n    ?reaction rh:side/rh:contains/rh:compound ?compound .\n    ?compound rh:chebi ?chebi .\n    ?chebi rdfs:subClassOf* CHEBI:15889\n  }\n\n  ?humanProtein up:organism taxon:9606 .\n  ?humanProtein up:annotation ?a .\n  ?a a up:Catalytic_Activity_Annotation .\n  ?a up:catalyticActivity ?ca .\n  ?ca up:catalyzedReaction ?reaction .\n\n  SERVICE  <https://sparql.omabrowser.org/sparql> {\n    ?cluster a orth:ParalogsCluster .\n    ?cluster orth:hasHomologousMember ?node1 , ?node2 .\n    ?node1 orth:hasHomologousMember* ?orthoProtein1 .\n    ?node2 orth:hasHomologousMember* ?orthoProtein2 .\n    ?orthoProtein1 lscr:xrefUniprot ?mouseProtein .\n    ?orthoProtein2 lscr:xrefUniprot ?humanProtein .\n    # inTaxon mouse\n    ?orthoProtein1 orth:organism/<http://purl.obolibrary.org/obo/RO_0002162> taxon:10090 . \n  }\n}",
        "results": 31052,
        "runtime": 320,
    },
    {
        "question": "Proteins with annotated binding sites for ligands similar to heme",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT \n  ?protein\n  ?mnemonic\n  ?proteinName\n  ?ligandSimilarityScore\n  ?ligand\nWHERE {\n  SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> {\n    ?ssc sachem:compound ?ligand; \n      sachem:score ?ligandSimilarityScore ;\n      sachem:similaritySearch ?sss .\n        # Smiles of Heme\n    ?sss    sachem:query "CC1=C(CCC([O-])=O)C2=[N+]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N+]5[Fe-]3(n14)n1c(=C6)c(C)c(CCC([O-])=O)c1=C2";\n      sachem:cutoff "8e-1"^^xsd:double ;\n      sachem:aromaticityMode sachem:aromaticityDetect ;\n      sachem:similarityRadius 1 ;\n      sachem:tautomerMode sachem:ignoreTautomers .\n  }\n  ?protein up:mnemonic ?mnemonic ;\n    up:recommendedName/up:fullName ?proteinName ;\n    up:annotation ?annotation .\n  ?annotation a up:Binding_Site_Annotation ;\n      up:ligand/rdfs:subClassOf ?ligand .\n}\nORDER BY DESC(?ligandSimilarityScore)',
        "results": 967082,
        "runtime": 181,
    },
    {
        "question": "Number of proteins with annotated binding sites for metals or metal sulfur clusters (and experimental evidence for the binding)",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>\nSELECT\n  ?ligand\n  ?ligandName \n  (COUNT(DISTINCT ?protein) as ?entries)\nWHERE {\n   ?protein up:annotation ?annotation .\n   \n   VALUES ?evs { obo:ECO_0000269 obo:ECO_0007744 } .\n   VALUES ?chebids { CHEBI:25213 CHEBI:25214 } .\n   ?st rdf:subject ?protein ; \n       rdf:predicate up:annotation ; \n       rdf:object ?annotation ;\n       up:attribution/up:evidence ?evs .\n\n   ?annotation up:ligand/rdfs:subClassOf ?ligand .\n   ?ligand rdfs:subClassOf+ ?chebids ;\n     rdfs:label ?ligandName .\n}\nGROUP BY ?ligand ?ligandName\nORDER BY DESC(?entries)",
        "results": 23,
        "runtime": 16,
    },
    {
        "question": "Select Enzymes that have Ligands that have a known allosteric effect",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT\n  ?protein\n  ?ligandName \n  ?ligandNote\n  ?chebi\nWHERE {\n   ?protein up:annotation ?annotation .\n   ?annotation a up:Binding_Site_Annotation . \n   ?annotation up:ligand ?ligand .\n   ?ligand rdfs:comment ?ligandNote ;\n     rdfs:subClassOf ?chebi ;\n     rdfs:label ?ligandName .\n   FILTER(REGEX(?ligandNote, "allosteric", "i"))\n}\n',
        "results": 730,
        "runtime": 22,
    },
    {
        "question": "Map PDB identifiers plus chains to UniProt",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT\n  ?pdbId ?chain ?pdbChain ?uniprot\nWHERE\n{\n  # A space separated list of pairs of PDB identifiers and the chain code.\n  VALUES(?pdbId ?pdbChain) { ('6VXC' 'A') ('1BG3' 'B') }\n\n  # Make an IRI out of the pdbId\n  BIND(iri(concat('http://rdf.wwpdb.org/pdb/', ?pdbId)) AS ?pdb)\n\n  # Map to UniProt entries\n  ?uniprot rdfs:seeAlso ?pdb .\n  ?pdb up:database <http://purl.uniprot.org/database/PDB> ;\n       up:chainSequenceMapping ?chainSm .\n  ?chainSm up:chain ?chainsPlusRange .\n\n  # Extract the list of chains from the text representation.\n  BIND(STRBEFORE(?chainsPlusRange, '=') AS ?chain)\n\n  # Filter those that match.\n  FILTER(CONTAINS(?chain, ?pdbChain))\n}",
        "results": 5,
        "runtime": 0,
    },
    {
        "question": "Map UniProt to HGNC identifiers and Symbols",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT\n  ?uniprot\n  ?hgnc\n  ?hgncSymbol\nWHERE\n{\n  # A space separated list of UniProt primary accessions.\n  VALUES (?acc) {('P05067') ('P00750')}\n  BIND(iri(concat(str(uniprotkb:), ?acc)) AS ?uniprot)\n  ?uniprot rdfs:seeAlso ?hgnc .\n  ?hgnc up:database <http://purl.uniprot.org/database/HGNC> ;\n       rdfs:comment ?hgncSymbol .\n}",
        "results": 2,
        "runtime": 0,
    },
    {
        "question": "Select all cross-references to external databases of the category '3D structure databases' of UniProt entries that are classified with the keyword 'Acetoin biosynthesis (KW-0005)'",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX keywords: <http://purl.uniprot.org/keywords/>\nSELECT DISTINCT ?link\nWHERE\n{\n    ?protein a up:Protein . \n    ?protein up:classifiedWith keywords:5 .\n    ?protein rdfs:seeAlso ?link .\n    ?link up:database ?db .\n    ?db up:category '3D structure databases'\n}",
        "results": 4303,
        "runtime": 2,
    },
    {
        "question": "Find Human proteins that catalyze reactions where substrates or product have a Cholestane skeleton",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT\n  DISTINCT\n    ?protein\n    ?disease\n    ?rhea\n    ?chebi\n    ?omim\nWHERE {\n    # Find complete ChEBIs with a Cholestane skeleton, via the Czech Elixir node IDSM Sachem chemical substructure search.\n    SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> {\n      ?chebi sachem:substructureSearch [\n        sachem:query\n"[C@]12(CCC3CCCC[C@]3(C)[C@@]1([H])CC[C@]1(C)[C@@]([H])([C@@](C)([H])CCCC(C)C)CC[C@@]21[H])[H]"\n].\n   }\n   # Use the fact that UniProt catalytic activities are annotated using Rhea \n   # Mapping the found ChEBIs to Rhea reactions\n   SERVICE <https://sparql.rhea-db.org/sparql>{\n     ?rhea rh:side/rh:contains/rh:compound/rdfs:subClassOf ?chebi .\n   }\n   # Match the found Rhea reactions with human UniProtKB proteins\n   ?protein up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea .\n   ?protein up:organism taxon:9606 .\n   # Find only those human entries that have an annotated related disease, and optionaly map these to OMIM\n   ?protein up:annotation/up:disease ?disease .\n   OPTIONAL {\n     ?disease rdfs:seeAlso ?omim .\n     ?omim up:database <http://purl.uniprot.org/database/MIM>\n   }\n}',
        "results": 27,
        "runtime": 19,
    },
    {
        "question": "Select the Gene Protein Reaction sets for Human (Ensembl Gene, Human UniProtKB, Catalyzed Rhea reactions)",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT\n?ensemblGene ?protein ?rhea\nWHERE {\n  ?protein up:reviewed true ;\n           up:organism taxon:9606 .\n  ?protein up:annotation ?caa ;\n           rdfs:seeAlso ?ensemblTranscript .\n  ?ensemblTranscript up:database <http://purl.uniprot.org/database/Ensembl> .\n\t?caa up:catalyticActivity ?ca .\n  ?ca up:catalyzedReaction ?rhea .\n  ?ensemblTranscript up:transcribedFrom ?ensemblGene \n}",
        "results": 122340,
        "runtime": 15,
    },
    {
        "question": "Select reviewed UniProt entries (Swiss-Prot), and their recommended protein name, that have a preferred gene name that contains the text 'DNA'",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\nSELECT ?protein ?name\nWHERE\n{\n        ?protein a up:Protein .\n        ?protein up:reviewed true .\n        ?protein up:recommendedName ?recommended .\n        ?recommended up:fullName ?name .\n        ?protein up:encodedBy ?gene .\n        ?gene skos:prefLabel ?text .\n        FILTER CONTAINS(?text, 'DNA')\n}\n",
        "results": 876,
        "runtime": 98,
    },
    {
        "question": "Select the preferred gene name and disease annotation of all human UniProt entries that are known to be involved in a disease",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nSELECT ?name ?text\nWHERE\n{\n        ?protein a up:Protein . \n        ?protein up:organism taxon:9606 .\n        ?protein up:encodedBy ?gene . \n        ?gene skos:prefLabel ?name .\n        ?protein up:annotation ?annotation .\n        ?annotation a up:Disease_Annotation .\n        ?annotation rdfs:comment ?text\n}",
        "results": 31652,
        "runtime": 2,
    },
    {
        "question": "Find all proteins linked to arachidonate (CHEBI:32395)",
        "endpoint": "https://sparql.uniprot.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>\n\nSELECT \n    DISTINCT\n        ?uniprot\n        ?uniprotID\n        ?recname\n        ?gene\n        ?chebi\n        ?uniprotName\nWHERE {\n  SERVICE <https://sparql.rhea-db.org/sparql> {\n     VALUES (?chebi) { (CHEBI:32395) }\n     ?rhea rh:side/rh:contains/rh:compound ?compound .\n     ?compound rh:chebi ?chebi .\n     ?chebi up:name ?uniprotName .\n  }\n  ?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea .\n  ?uniprot up:mnemonic ?uniprotID .\n  ?uniprot up:recommendedName/up:fullName ?recname .\n  OPTIONAL {?uniprot up:encodedBy/skos:prefLabel ?gene .}\n}",
        "results": 23346,
        "runtime": 30,
    },
    {
        "question": "What are the species present in Bgee?",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\n\nSELECT ?species WHERE {\n    ?species a up:Taxon .\n}\n",
        "results": 52,
        "runtime": 0,
    },
    {
        "question": "What are the anatomical entities where the P02654 gene is expressed? Note that P02654 is a UniProtKB identifier of the APOC1 human gene.",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": "PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX genex: <http://purl.org/genex#>\n\nSELECT DISTINCT ?anat ?anatName WHERE {\n    ?seq a orth:Gene ;\n        genex:isExpressedIn ?anat ;\n        lscr:xrefUniprot uniprotkb:P02654 .\n    ?anat a genex:AnatomicalEntity ;\n        rdfs:label ?anatName .\n}\n",
        "results": 526,
        "runtime": 0,
    },
    {
        "question": "What is all the metadata related to the ENSG00000130208 gene, where ENSG00000130208 is the identifier of the APOC1 human gene.",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX dcterms: <http://purl.org/dc/terms/>\n\nSELECT DISTINCT ?symbol ?description ?id\n?links ?organism ?uniprot ?ensembl ?ncbi WHERE {\n    ?seq a orth:Gene ;\n        rdfs:label ?symbol ;\n        rdfs:seeAlso ?links ;\n        dcterms:description ?description ;\n        dcterms:identifier ?id ;\n        orth:organism ?organism .\n    OPTIONAL{?seq lscr:xrefUniprot ?uniprot .}\n    OPTIONAL{?seq lscr:xrefEnsemblGene ?ensembl .}\n    OPTIONAL{?seq lscr:xrefNCBIGene ?ncbi .}\n    FILTER (?id = "ENSG00000130208")\n}\n',
        "results": 16,
        "runtime": 1,
    },
    {
        "question": "What are the anatomical entities where the APOC1 Homo sapiens gene is not expressed, that is where is APOC1 absent?",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX genex: <http://purl.org/genex#>\n\nSELECT DISTINCT ?anat ?anatName WHERE {\n    ?seq a orth:Gene ;\n        rdfs:label "APOC1" ;\n        genex:isAbsentIn ?anat ;\n        orth:organism ?organism .\n    ?anat a genex:AnatomicalEntity ;\n        rdfs:label ?anatName .\n    ?organism obo:RO_0002162  ?species .\n    ?species a up:Taxon ;\n        up:scientificName "Homo sapiens" .\n}\n',
        "results": 6,
        "runtime": 0,
    },
    {
        "question": "What are the species present in Bgee and their scientific and common names?",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\n\nSELECT ?species ?sci_name ?common_name WHERE {\n    ?species a up:Taxon ;\n        up:scientificName ?sci_name ;\n        up:rank up:Species .\n    OPTIONAL { ?species up:commonName ?common_name . }\n}\n",
        "results": 52,
        "runtime": 0,
    },
    {
        "question": "What are the anatomical entities where the APOC1 gene is expressed?",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX genex: <http://purl.org/genex#>\n\nSELECT DISTINCT ?anat ?anatName WHERE {\n    ?seq a orth:Gene ;\n        genex:isExpressedIn ?anat ;\n        rdfs:label "APOC1" .\n    ?anat a genex:AnatomicalEntity ;\n        rdfs:label ?anatName .\n}\n',
        "results": 584,
        "runtime": 0,
    },
    {
        "question": "What are the anatomical entities where the APOC1 Homo sapiens gene is expressed?",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX genex: <http://purl.org/genex#>\n\nSELECT DISTINCT ?anat ?anatName WHERE {\n    ?seq a orth:Gene ;\n        genex:isExpressedIn ?anat ;\n        rdfs:label "APOC1" .\n    ?anat a genex:AnatomicalEntity ;\n        rdfs:label ?anatName .\n    ?seq orth:organism ?organism .\n    ?organism obo:RO_0002162  ?species .\n    ?species a up:Taxon ;\n        up:scientificName "Homo sapiens" .\n}\n',
        "results": 526,
        "runtime": 0,
    },
    {
        "question": "What are the anatomical entities where the human gene APOC1 is expressed in the post-juvenile stage along with its expression score independently of the strain, sex, and cell type?",
        "endpoint": "https://www.bgee.org/sparql/",
        "query": 'PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX genex: <http://purl.org/genex#>\n\nSELECT DISTINCT ?anat ?anatName ?score ?stage WHERE {\n    ?seq a orth:Gene ;\n        rdfs:label "APOC1" ;\n        orth:organism ?organism .\n    ?expression a genex:Expression ;\n        genex:hasExpressionCondition ?condition ;\n        genex:hasExpressionLevel ?score ;\n        genex:hasSequenceUnit ?seq .\n    ?condition a genex:ExpressionCondition ;\n        genex:hasAnatomicalEntity ?anat ;\n        genex:hasAnatomicalEntity obo:GO_0005575 ;\n        genex:hasDevelopmentalStage ?stage ;\n        genex:hasSex "any" ;\n        genex:hasStrain ?strain .\n    ?anat a genex:AnatomicalEntity ;\n        rdfs:label ?anatName .\n    ?stage rdfs:label "post-juvenile" .\n    ?strain rdfs:label "wild-type" .\n    ?organism obo:RO_0002162  ?species .\n    ?species a up:Taxon ;\n        up:commonName "human" .\nFILTER (?anat !=  obo:GO_0005575)\n} ORDER BY DESC(?score)\n',
        "results": 456,
        "runtime": 60,
    },
    {
        "question": "Find all Rattus norvegicus' proteins present in OMA RDF database.",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nSELECT ?protein ?OMA_link\nWHERE\n{\n    ?protein a orth:Protein.\n    ?protein  orth:organism ?organism.\n    ?inTaxon rdfs:label 'in taxon'@en.\n    ?organism ?inTaxon ?taxon.\n    ?taxon  up:scientificName 'Rattus norvegicus'.\n    ?protein rdfs:seeAlso ?OMA_link.\n}",
        "results": 22232,
        "runtime": 3,
    },
    {
        "question": "Retrieve all proteins belongong to the Hierarchical Orthologous Group (HOG) at the level 'Vertebrata' to which humans' CDIN1 gene belong, together with their gene name symbol if available.",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nselect distinct ?HOG ?MEMBER ?GENE_LABEL\nwhere {\n    ?HOG a orth:OrthologsCluster ;\n      orth:hasHomologousMember ?node1 ;\n      orth:hasTaxonomicRange ?taxRange .\n    ?taxRange orth:taxRange 'Vertebrata' .\n    ?node1 orth:hasHomologousMember* ?query ;\n      orth:hasHomologousMember* ?MEMBER .\n    ?MEMBER a orth:Protein .\n    OPTIONAL {\n        ?MEMBER rdfs:label ?GENE_LABEL .\n    }\n    ?query a orth:Protein ;\n      orth:organism/obo:RO_0002162/up:scientificName 'Homo sapiens';\n      rdfs:label 'CDIN1'.\n}",
        "results": 135,
        "runtime": 0,
    },
    {
        "question": "Which species are available on OMA database and their scientific names?",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nSELECT ?species ?sciname WHERE\n{\n    ?species a up:Taxon.\n    ?species up:scientificName ?sciname.\n    ?species up:rank up:Species.\n}",
        "results": 2851,
        "runtime": 0,
    },
    {
        "question": "Retrieve all proteins in OMA that is encoded by the INS gene and their mnemonics and evidence types from Uniprot database (federated query).",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX lscr: <http://purl.org/lscr#>\nSELECT DISTINCT ?proteinOMA ?species  ?mnemonic  ?evidenceType ?UniProt_URI\nWHERE {\n    ?proteinOMA a orth:Protein;\n    orth:organism/obo:RO_0002162/up:scientificName ?species;\n    rdfs:label 'INS'.\n    ?proteinOMA lscr:xrefUniprot ?UniProt_URI.\n    #Search the INS gene mnemonics and evidence types from Uniprot database.\n    service <http://sparql.uniprot.org/sparql> { \n    ?UniProt_URI up:mnemonic ?mnemonic;\n    up:existence/rdfs:label ?evidenceType. }\n}",
        "results": 76,
        "runtime": 12,
    },
    {
        "question": "Retrieve all genes that are orthologous to ENSLACG00000002497 Ensembl gene (identifier)",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX sio: <http://semanticscience.org/resource/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>\nselect ?protein2 ?OMA_LINK \nwhere {\n    #The three that contains Orthologs. The leafs are proteins.\n    #This graph pattern defines the relationship protein1 is Orthologs to protein2\n    ?cluster a orth:OrthologsCluster.\n    ?cluster orth:hasHomologousMember ?node1.\n    ?cluster orth:hasHomologousMember ?node2. \n    ?node2 orth:hasHomologousMember* ?protein2. \n    ?node1 orth:hasHomologousMember* ?protein1.\n    ########\n     \n    #Specify the protein to look for its orthologs\n    ?protein1 sio:SIO_010079/lscr:xrefEnsemblGene  ensembl:ENSLACG00000002497.\n    ########\n     \n    #The OMA link to the second protein\n    ?protein2 rdfs:seeAlso ?OMA_LINK. \n    ########\n     \n    filter(?node1 != ?node2) \n}",
        "results": 42,
        "runtime": 0,
    },
    {
        "question": "Retrieve all genes that are paralogous to ENSG00000244734 Ensembl gene (identifier).",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX sio: <http://semanticscience.org/resource/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>\nselect ?protein2 ?OMA_LINK \nwhere {\n    #The three that contains paralogs. The leafs are proteins.\n    #This graph pattern defines the relationship protein1 is paralogous to protein2\n    ?cluster a orth:ParalogsCluster.\n    ?cluster orth:hasHomologousMember ?node1.\n    ?cluster orth:hasHomologousMember ?node2.\n    ?node2 orth:hasHomologousMember* ?protein2.\n    ?node1 orth:hasHomologousMember* ?protein1.\n    ########\n\n    #Specify the protein to look for its paralogs\n    ?protein1 sio:SIO_010079/lscr:xrefEnsemblGene  ensembl:ENSG00000244734.\n    ########\n\n    #The OMA link to the second protein\n    ?protein2 rdfs:seeAlso ?OMA_LINK. \n    ########\n     \n    filter(?node1 != ?node2) \n}",
        "results": 473,
        "runtime": 2,
    },
    {
        "question": "Retrieve all genes that are paralogous to HUMAN00529 OMA protein (identifier) and their cross-reference links to OMA and Uniprot.",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX dc: <http://purl.org/dc/terms/>\nselect ?protein2 ?Uniprot_link\nwhere {\n    ?cluster a orth:ParalogsCluster.\n    ?cluster orth:hasHomologousMember ?node1.\n    ?cluster orth:hasHomologousMember ?node2.\n    ?node2 orth:hasHomologousMember* ?protein2.\n    ?node1 orth:hasHomologousMember* ?protein1.\n    ?protein1 a orth:Protein.\n    ?protein1 dc:identifier 'HUMAN00529'.\n    ?protein2 a orth:Protein. \n    ?protein2  lscr:xrefUniprot ?Uniprot_link. \n    filter(?node1 != ?node2)\n}",
        "results": 979,
        "runtime": 0,
    },
    {
        "question": "Retrieve all genes that are orthologous to HUMAN22169 OMA protein (identifier) and their cross-reference links to OMA and Uniprot.",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX orth: <http://purl.org/net/orth#>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX dc: <http://purl.org/dc/terms/>\nselect ?protein2 ?Uniprot_link\nwhere {\n    ?cluster a orth:OrthologsCluster.\n    ?cluster orth:hasHomologousMember ?node1.\n    ?cluster orth:hasHomologousMember ?node2.\n    ?node2 orth:hasHomologousMember* ?protein2.\n    ?node1 orth:hasHomologousMember* ?protein1.\n    ?protein1 a orth:Protein.\n    ?protein1 dc:identifier 'HUMAN22169'.\n    ?protein2 a orth:Protein. \n    ?protein2  lscr:xrefUniprot ?Uniprot_link. \n    filter(?node1 != ?node2)\n}",
        "results": 161,
        "runtime": 0,
    },
    {
        "question": "Retrieve all genes per species that are orthologous to Rabbit's APOCI or APOC1 gene and their cross-reference links to OMA and Uniprot including the corresponding Ensembl gene identifier.",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX sio: <http://semanticscience.org/resource/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX lscr: <http://purl.org/lscr#>\nPREFIX dc: <http://purl.org/dc/terms/>\nselect ?protein1    ?protein2  ?geneName2  ?species2 ?Prot2_uniprot ?prot2_ensemblGeneId  \nwhere {\n    ?cluster a orth:OrthologsCluster.\n    ?cluster orth:hasHomologousMember ?node1.\n    ?cluster orth:hasHomologousMember ?node2. \n    ?node2 orth:hasHomologousMember* ?protein2. \n    ?node1 orth:hasHomologousMember* ?protein1.\n    ?protein1 a orth:Protein;\n    orth:organism/obo:RO_0002162/up:scientificName 'Oryctolagus cuniculus';\n    rdfs:label 'APOCI'.\n    ?protein2 a orth:Protein; \n    lscr:xrefUniprot ?Prot2_uniprot;\n    sio:SIO_010079/lscr:xrefEnsemblGene/dc:identifier  ?prot2_ensemblGeneId;\n    rdfs:label ?geneName2;\n    orth:organism/obo:RO_0002162/up:scientificName ?species2.\n    filter(?node1 != ?node2)\n}",
        "results": 56,
        "runtime": 1,
    },
    {
        "question": "Retrieve all Rabbit's proteins encoded by genes that are orthologous to Mouses's hemoglobin Y gene and their cross-reference links to Uniprot.",
        "endpoint": "https://sparql.omabrowser.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX orth: <http://purl.org/net/orth#>\nPREFIX obo: <http://purl.obolibrary.org/obo/>\nPREFIX lscr: <http://purl.org/lscr#>\nselect distinct ?MOUSE_PROTEIN ?RABIT_PROTEIN ?MOUSE_UNIPROT_XREF ?RABIT_UNIPROT_XREF \nwhere {\n    ?cluster a orth:OrthologsCluster.\n    ?cluster orth:hasHomologousMember ?node1.\n    ?cluster orth:hasHomologousMember ?node2. \n    ?node2 orth:hasHomologousMember* ?RABIT_PROTEIN. \n    ?node1 orth:hasHomologousMember* ?MOUSE_PROTEIN.\n    ?MOUSE_PROTEIN a orth:Protein.\n    ?MOUSE_PROTEIN  orth:organism/obo:RO_0002162/up:scientificName 'Mus musculus';\n    rdfs:label 'HBB-Y';\n    lscr:xrefUniprot ?MOUSE_UNIPROT_XREF.\n    ?RABIT_PROTEIN a orth:Protein.\n    ?RABIT_PROTEIN orth:organism/obo:RO_0002162/up:scientificName 'Oryctolagus cuniculus' .\n    ?RABIT_PROTEIN lscr:xrefUniprot ?RABIT_UNIPROT_XREF.\n    filter(?node1 != ?node2)\n}",
        "results": 24,
        "runtime": 0,
    },
    {
        "question": "Select all Rhea reactions that have a given ChEBI ID as reaction participant",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>\n# Query 14\n# Select all Rhea reactions that have CHEBI:29985 (L-glutamate) as reaction participant\n# \n# This query corresponds to the Rhea website query:\n# https://www.rhea-db.org/rhea?query=chebi:29985\nSELECT distinct ?chebi ?rhea ?equation\nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:equation ?equation .\n  ?rhea rh:side/rh:contains/rh:compound ?compound .\n  #\n  # the ChEBI can be used either as a small molecule, the reactive part of a macromolecule or as a polymer.\n  #\n  ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .\n  VALUES (?chebi) { (CHEBI:29985) }\n}",
        "results": 244,
        "runtime": 0,
    },
    {
        "question": "Select all Rhea reactions mapped to KEGG reactions",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 5 \n# Select all Rhea reactions mapped to KEGG reactions\n# KEGG reactions are mapped to Rhea bidirectional reactions\n# Rhea web query: https://www.rhea-db.org/rhea?query=kegg:*\n#\nSELECT ?rhea ?kegg ?rheaDir \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:bidirectionalReaction ?rheaDir .\n  ?rheaDir rdfs:seeAlso ?kegg .\n  FILTER (regex(str(?kegg),'kegg'))\n}",
        "results": 6898,
        "runtime": 0,
    },
    {
        "question": "Select all ChEBI compounds used in Rhea as reaction participant",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 15\n# Select all ChEBI compounds used in Rhea as reaction participant\n# \n# This query can not be expressed in the Rhea website\nSELECT ?chebi ?name (count(?rhea) as ?countRhea)\nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:side/rh:contains/rh:compound ?compound .\n  #\n  # the ChEBI can be used either as a small molecule, the reactive part of a macromolecule or as a polymer.\n  #\n  ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .\n  ?chebi up:name ?name .\n}\nGROUP BY ?chebi ?name\nORDER BY DESC(?countRhea)",
        "results": 12594,
        "runtime": 6,
    },
    {
        "question": "Select all Rhea reactions mapped to MetaCyc reactions",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": 'PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 6 \n# Select all Rhea reactions mapped to MetaCyc reactions\n# MetaCyc reactions are mapped to either undirected, left-to-right, right-to-left or bidirectional reactions\n# Rhea web query: https://www.rhea-db.org/rhea?query=metacyc:*\n#\nSELECT distinct ?rhea ?rheaDir ?metacyc\nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  {\n    ?rhea rdfs:seeAlso ?metacyc .\n    FILTER CONTAINS(str(?metacyc), "METACYC") \n    BIND(?rhea as ?rheaDir)\n  }\n  UNION\n  {\n    ?rhea rh:directionalReaction ?directionalReaction .\n    ?directionalReaction rdfs:seeAlso ?metacyc . \n    FILTER CONTAINS(str(?metacyc), "METACYC") \n    BIND(?directionalReaction as ?rheaDir  )\n  }\n  UNION\n  {\n    ?rhea rh:bidirectionalReaction ?bidirectionalReaction .\n    ?bidirectionalReaction rdfs:seeAlso ?metacyc . \n    FILTER CONTAINS(str(?metacyc), "METACYC") \n    BIND(?bidirectionalReaction as ?rheaDir  )\n  }\n}',
        "results": 9293,
        "runtime": 3,
    },
    {
        "question": "Generate a draft human metabolome",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX taxon: <http://purl.uniprot.org/taxonomy/>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX chebislash: <http://purl.obolibrary.org/obo/chebi/>\nSELECT\n  ?uniprot ?mnemonic ?rhea ?chebi ?smiles ?inchiKey\nWHERE\n{\n  ?rhea rh:side/rh:contains/rh:compound ?compound .\n  ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .\n  ?chebi chebislash:smiles ?smiles ;\n          chebislash:inchikey ?inchiKey .\n  SERVICE <https://sparql.uniprot.org/sparql/> {\n     ?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea ;\n                                             up:organism taxon:9606 ;\n                                             up:mnemonic ?mnemonic .\n\n   }\n}",
        "results": 279926,
        "runtime": 131,
    },
    {
        "question": "Select all Rhea reactions linked to an enzyme classification (sub)-class",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX ec: <http://purl.uniprot.org/enzyme/>\n# Query 10\n# Select all Rhea reactions mapped to \n# \n# This query corresponds to the Rhea website query:\n# https://www.rhea-db.org/rhea?query=ec:1.*\n# https://www.rhea-db.org/rhea?query=ec:1.1.*\n# https://www.rhea-db.org/rhea?query=ec:1.1.1.*\n#\nSELECT ?ec ?ecNumber ?rhea ?accession ?equation\nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:accession ?accession .\n  ?rhea rh:ec ?ec .\n  BIND(strafter(str(?ec),str(ec:)) as ?ecNumber)\n  # class (e.g EC 1.-.-.-)\n  FILTER (regex(?ecNumber,'^1\\\\.')) \n  # sub-class  (e.g EC 1.1.-.-)\n  #FILTER (regex(?ecNumber,'^1\\\\.1\\\\.')) \n  # sub-sub-class (e.g EC 1.1.1.-)\n  #FILTER (regex(?ecNumber,'^1\\\\.1\\\\.1\\\\.')) \n  ?rhea rh:equation ?equation .\n}",
        "results": 2658,
        "runtime": 0,
    },
    {
        "question": "Select all Rhea reactions",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 1\n# Select all Rhea reactions (unspecified direction) \n# and return identifier (id), accession, boolean attributes (isChemicallyBalanced, isTransport) and chemical equation.\n#\nSELECT ?rhea ?id ?accession ?isChemicallyBalanced ?isTransport  ?equation \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:id ?id .\n  ?rhea rh:accession ?accession .\n  ?rhea rh:equation ?equation .\n  ?rhea rh:isTransport ?isTransport .\n  ?rhea rh:isChemicallyBalanced ?isChemicallyBalanced .\n}",
        "results": 16609,
        "runtime": 2,
    },
    {
        "question": "Select all Rhea reactions that have a pair of ChEBI IDs as reaction participant and in opposite side",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>\n# Query 16\n# Select all Rhea reactions that have a pair of ChEBI IDs as reaction participant and in opposite side\n# Return Rhea reactions that have CHEBI:29985 (L-glutamate) as reaction participant in one side\n# and CHEBI:58359 (L-glutamine) in the other side\n#\n# This query cannot be expressed in the Rhea website\nSELECT ?chebi1 ?name1 ?chebi2 ?name2 ?rhea ?equation\nWHERE {\n  VALUES (?chebi1) { (CHEBI:29985) }\n  ?chebi1 up:name ?name1 .\n  ?rhea rh:side ?reactionSide1 .\n  ?reactionSide1  rh:contains / rh:compound / rh:chebi ?chebi1 .\n\n  VALUES (?chebi2) { (CHEBI:58359) }\n  ?chebi2 up:name ?name2 .\n\n  ?rhea rh:side ?reactionSide2 .\n  ?reactionSide2  rh:contains / rh:compound / rh:chebi ?chebi2 .\n  \n  ?reactionSide1 rh:transformableTo ?reactionSide2 .\n  \n  ?rhea rh:equation ?equation .\n}",
        "results": 32,
        "runtime": 1,
    },
    {
        "question": "Select all Rhea reactions mapped to Reactome reactions",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": 'PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 7 \n# Select all Rhea reactions mapped to Reactome reactions\n# MetaCyc reactions are mapped to either undirected, left-to-right or right-to-left reactions\n# Rhea web query: https://www.rhea-db.org/rhea?query=reactome:*\n#    \nSELECT distinct ?rhea ?rheaDir ?reactome  \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  {\n    ?rhea rdfs:seeAlso ?reactome .\n    FILTER CONTAINS(str(?reactome), "reactome") \n    BIND(?rhea as ?rheaDir)\n  }\n  UNION\n  {\n    ?rhea rh:directionalReaction ?directionalReaction .\n    ?directionalReaction rdfs:seeAlso ?reactome . \n    FILTER CONTAINS(str(?reactome), "reactome") \n    BIND(?directionalReaction as ?rheaDir  )\n  }\n}',
        "results": 1502,
        "runtime": 0,
    },
    {
        "question": "Distribution of reactions according to the first class of the enzyme classification (federated query)",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX ec: <http://purl.uniprot.org/enzyme/>\n# Query 11\n# Retrieve the count of reactions mapped to each level (main class) of the enzyme classification\n# \n# This query mimics the Filter section of the Rhea website (Browse all reactions)\n# https://www.rhea-db.org/rhea?query=\nSELECT ?ecClass (STR(?ecName) AS ?ecClassName) (COUNT(?rhea) AS ?rheaCount)\nWHERE {\n  SERVICE <https://sparql.uniprot.org/sparql> {\n    VALUES (?ecClass) { (ec:1.-.-.-)(ec:2.-.-.-)(ec:3.-.-.-)(ec:4.-.-.-)(ec:5.-.-.-) (ec:6.-.-.-) (ec:7.-.-.-)}\n    ?ecNumber rdfs:subClassOf ?ecClass .\n    ?ecClass skos:prefLabel ?ecName .\n  }\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:ec ?ecNumber .\n} GROUP BY ?ecClass ?ecName",
        "results": 7,
        "runtime": 4,
    },
    {
        "question": "Select all Rhea reactions annotated with a given Pubmed ID",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX pubmed: <http://rdf.ncbi.nlm.nih.gov/pubmed/>\n# Query 2 \n# Select all Rhea reactions annotated with a given Pubmed identifier (PMID = 29867142)\n#\nSELECT ?pubmed ?rhea ?accession ?isTransport  ?equation \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:accession ?accession .\n  ?rhea rh:citation ?pubmed .\n  VALUES (?pubmed) { (pubmed:29867142) }\n  ?rhea rh:isTransport ?isTransport .\n  ?rhea rh:equation ?equation .\n} ORDER BY ?rhea",
        "results": 14,
        "runtime": 0,
    },
    {
        "question": "Select all Rhea reactions that involve a lipid, i.e. children of CHEBI:18059 in the ChEBI hierarchy.",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX owl: <http://www.w3.org/2002/07/owl#>\nPREFIX chebihash: <http://purl.obolibrary.org/obo/chebi#>\nPREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>\n# Query 17\n# Select all Rhea reactions that involve a lipid, i.e. children of CHEBI:18059 in the ChEBI hierarchy.\n# \n# This query corresponds to the Rhea website query:\n# https://www.rhea-db.org/rhea?query=chebi:18059\n#\nSELECT distinct ?chebi ?name ?rhea ?equation\nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:equation ?equation .\n  ?rhea rh:side/rh:contains/rh:compound ?compound .\n  #\n  # the ChEBI can be used either as a small molecule, the reactive part of a macromolecule or as a polymer.\n  #\n  { \n    ?chebi rdfs:subClassOf* CHEBI:18059 . # lipid\n    ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .\n  }\n  UNION \n  { # add non-pH 7.3 species\n    ?not7_3 rdfs:subClassOf* CHEBI:18059 . # lipid\n    ?not7_3 rdfs:subClassOf ?chebiRestriction .\n    ?chebiRestriction a owl:Restriction .\n    ?chebiRestriction owl:onProperty chebihash:has_major_microspecies_at_pH_7_3 .\n    ?chebiRestriction owl:someValuesFrom ?chebi .\n    ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .\n  }\n  ?chebi up:name ?name .\n}",
        "results": 12834,
        "runtime": 6,
    },
    {
        "question": "Select all Rhea reactions mapped to GO molecular functions",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": 'PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 8\n# Select all Rhea reactions mapped to GO molecular function\n# \n# This query corresponds to the Rhea website query:\n# https://www.rhea-db.org/rhea?query=go:*\n#\nSELECT ?go ?rhea ?equation \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rdfs:seeAlso ?go .\n  FILTER CONTAINS(str(?go), "GO_")   \n  ?rhea rh:equation ?equation .\n}',
        "results": 4432,
        "runtime": 0,
    },
    {
        "question": "Select all Rhea reactions linked to protein sequences (enzymes and transporters) in UniProtKB",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX up: <http://purl.uniprot.org/core/>\nPREFIX rh: <http://rdf.rhea-db.org/>\n# Query 12\n# Select all Rhea reactions used to annotate enzyme sequences in UniProtKB\n# return the number of UniProtKB entries\n# Federated query using a service to UniProt SPARQL endpoint\n#\n# This query corresponds to the Rhea website query:\n# https://www.rhea-db.org/rhea?query=uniprot:*\n#\nSELECT ?uniprotCount ?rhea ?accession ?equation \nWHERE {\n  SERVICE <https://sparql.uniprot.org/sparql> { \n  \tSELECT ?rhea (count(?uniprot) as ?uniprotCount) {\n      ?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea . \n  \t}\n  \tGROUP BY ?rhea\n  }\n  ?rhea rh:accession ?accession .\n  ?rhea rh:equation ?equation .\n}",
        "results": 12402,
        "runtime": 3,
    },
    {
        "question": "Select the specific form of RHEA:11628, a reaction that deals with general chemical classes (i.e. select the equivalent reactions involving instances of the chemical classes in RHEA:11628",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 3\n# Select the specific form of RHEA:11628.\n# This query mimics the Related reactions sections of\n# https://www.rhea-db.org/rhea/11628\n#\nSELECT ?rhea  ?equation ?childrenRhea ?childrenEquation \nWHERE {\n  VALUES (?rhea) {(rh:11628)}\n  ?rhea rh:equation ?equation .\n  ?childrenRhea rdfs:subClassOf+ ?rhea .\n  ?childrenRhea rh:equation ?childrenEquation .\n}",
        "results": 12,
        "runtime": 0,
    },
    {
        "question": "Use IDSM Sachem to find ChEBIs with a a Cholestane skeleton (in SMILES). Then match returned ChEBIs to Rhea undirected reactions.",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": 'PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>\nPREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\nSELECT \n  ?rhea \n  ?chebi\nWHERE {\n  SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> {\n    ?chebi sachem:substructureSearch [\n        sachem:query "[C@]12(CCC3CCCC[C@]3(C)[C@@]1([H])CC[C@]1(C)[C@@]([H])([C@@](C)([H])CCCC(C)C)CC[C@@]21[H])[H]" ].\n  }\n  ?rhea rh:side/rh:contains/rh:compound/rdfs:subClassOf ?chebi .\n}',
        "results": 135,
        "runtime": 0,
    },
    {
        "question": "Select all Rhea reactions mapped to enzyme classification (EC numbers)",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nPREFIX ec: <http://purl.uniprot.org/enzyme/>\n# Query 9\n# Select all Rhea reactions mapped to EC numbers (enzyme classification)\n# \n# This query corresponds to the Rhea website query:\n# https://www.rhea-db.org/rhea?query=ec:*\n#\nSELECT ?ec ?ecNumber ?rhea ?accession ?equation \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  ?rhea rh:accession ?accession .\n  ?rhea rh:ec ?ec .\n  BIND(strafter(str(?ec),str(ec:)) as ?ecNumber)\n  ?rhea rh:isTransport ?isTransport .\n  ?rhea rh:equation ?equation .\n}",
        "results": 7675,
        "runtime": 0,
    },
    {
        "question": "Select all cross-references for a given reaction",
        "endpoint": "https://sparql.rhea-db.org/sparql/",
        "query": "PREFIX rh: <http://rdf.rhea-db.org/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n# Query 4\n# Select all cross-references mapped to RHEA:21016\n# \n# This query mimics the Cross-references section of \n# https://www.rhea-db.org/rhea/21016\n#\nSELECT distinct ?rhea ?rheaDir ?xref \nWHERE {\n  ?rhea rdfs:subClassOf rh:Reaction .\n  VALUES (?rhea) {(rh:21016)}\n  {\n    ?rhea rdfs:seeAlso ?xref .\n    BIND(?rhea as ?rheaDir)\n  }\n  UNION\n  {\n    ?rhea rh:directionalReaction ?directionalReaction .\n    ?directionalReaction rdfs:seeAlso ?xref . \n    BIND(?directionalReaction as ?rheaDir  )\n  }\n  UNION\n  {\n    ?rhea rh:bidirectionalReaction ?bidirectionalReaction .\n    ?bidirectionalReaction rdfs:seeAlso ?xref . \n    BIND(?bidirectionalReaction as ?rheaDir  )\n  }\n}",
        "results": 6,
        "runtime": 0,
    },
]

success = 0
mismatch = 0
fail = 0

print(f"🧪 Testing {len(example_queries)} queries")

# Extract the SPARQL query from markdown
# pattern = re.compile(r"```sparql(.*?)```", re.DOTALL)
for i, query in enumerate(example_queries):
    # if i < 49:
    #     continue
    resp = requests.post(
        "http://localhost:8000/chat",
        json={
            "messages": [{"role": "user", "content": query["question"]}],
            "model": "gpt-4o",
            "max_tokens": 50,
            "stream": False,
            "api_key": expasy_api_key,
        },
        timeout=60,
    )
    chat_resp_md = resp.json()["choices"][0]["message"]["content"]

    try:
        generated_sparql = extract_sparql_queries(chat_resp_md)[-1]
        # print(generated_sparql)
        # print(f"[{query['question']}]")
        if generated_sparql["query"].strip() == query["query"].strip():
            print(f"✅ {query['question']}. EXACT MATCH")
            success += 1
            continue

        # sparql_endpoint = SPARQLWrapper(query["endpoint"])
        sparql_endpoint = SPARQLWrapper(generated_sparql["endpoint"])
        sparql_endpoint.setReturnFormat(JSON)
        sparql_endpoint.setQuery(generated_sparql["query"])
        sparql_endpoint.setTimeout(200)
        results = sparql_endpoint.query().convert()
        res_from_generated = len(results["results"]["bindings"])
        if res_from_generated != query["results"]:
            if res_from_generated > 0:
                mismatch += 1
            raise Exception(f"{query['results']} != {res_from_generated}")
            # print(results)
        else:
            print(f"✅ {query['question']} {query['results']} == {res_from_generated}")
            success += 1
    except Exception as e:
        fail += 1
        print(f"❌ {query['question']} {e}. Generated query:")
        print(generated_sparql["query"])
        print("Correct query:")
        print(query["query"])
        print("")
    # print(results)
    # print(f"Results: {len(results['results']['bindings'])}")

    print(f"⚖️ Success: {success}, Mismatch: {mismatch}, Fail: {fail - mismatch}")

  from .autonotebook import tqdm as notebook_tqdm


🧪 Testing 74 queries
