# Parsing a SPARQL query to extract basic graph patterns

Function to parse a SPARQL query and extract the BGP it contains to load them in a RDFLib graph, with support for federated queries

In [2]:
import json
from typing import Tuple
from unittest import result
from SPARQLWrapper import SPARQLWrapper

from rdflib import Namespace, URIRef, Variable, ConjunctiveGraph
from rdflib.plugins.sparql.parser import parseQuery
from rdflib.plugins.sparql.algebra import translateQuery
from rdflib.plugins.sparql.sparql import Query
from rdflib.paths import Path, SequencePath, MulPath
from expasy_chat.utils import validate_sparql_with_void


# sparql_query = """
# PREFIX up: <http://purl.uniprot.org/core/>
# PREFIX rh: <http://rdf.rhea-db.org/>
# SELECT ?uniprotCount ?rhea ?accession ?equation
# WHERE {
#   SERVICE <https://sparql.uniprot.org/sparql> {
#       SELECT ?rhea (count(?uniprot) as ?uniprotCount) {
#           ?uniprot up:annotation ?rhea .
#           ?uniprot up:toast ?toooast .
#       }
#       GROUP BY ?rhea
#   }
#   ?rhea rh:accession ?accession .
#   ?rhea rh:equation ?equation .
# }
# """

# # Hard query
sparql_query = """PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX up:<http://purl.uniprot.org/core/>
PREFIX taxon:<http://purl.uniprot.org/taxonomy/>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX orth:<http://purl.org/net/orth#>
PREFIX dcterms:<http://purl.org/dc/terms/>
PREFIX obo:<http://purl.obolibrary.org/obo/>
PREFIX lscr:<http://purl.org/lscr#>
PREFIX genex:<http://purl.org/genex#>
PREFIX sio: <http://semanticscience.org/resource/>
SELECT DISTINCT ?diseaseLabel ?humanProtein ?hgncSymbol ?orthologRatProtein ?orthologRatGene
WHERE {
    SERVICE <https://sparql.uniprot.org/sparql> {
        SELECT DISTINCT * WHERE {
            ?humanProtein a up:Protein ;
                up:organism/up:scientificName 'Homo sapiens' ;
                up:annotation ?annotation ;
                rdfs:seeAlso ?hgnc .
            ?hgnc up:database <http://purl.uniprot.org/database/HGNC> ;
                rdfs:label ?hgncSymbol . # comment
            ?annotation a up:Disease_Annotation ;
                up:disease ?disease .
            ?disease a up:Disease ;
                rdfs:label ?diseaseLabel . # skos:prefLabel
            FILTER CONTAINS(?diseaseLabel, "cancer")
        }
    }
    SERVICE <https://sparql.omabrowser.org/sparql/> {
        SELECT ?humanProtein ?orthologRatProtein ?orthologRatGene WHERE {
            ?humanProteinOma a orth:Protein ;
                lscr:xrefUniprot ?humanProtein .
            ?orthologRatProtein a orth:Protein ;
                sio:SIO_010078 ?orthologRatGene ; # 79
                orth:organism/obo:RO_0002162/up:scientificNam 'Rattus norvegicus' .
            ?cluster a orth:OrthologsCluster .
            ?cluster orth:hasHomologousMember ?node1 .
            ?cluster orth:hasHomologousMember ?node2 .
            ?node1 orth:hasHomologousMember* ?humanProteinOma .
            ?node2 orth:hasHomologousMember* ?orthologRatProtein .
            FILTER(?node1 != ?node2)
        }
    }
    SERVICE <https://www.bgee.org/sparql/> {
        ?orthologRatGene genex:isExpressedIn ?anatEntity ;
            orth:organism ?ratOrganism .
        ?anatEntity rdfs:label 'brain' .
        ?ratOrganism obo:RO_0002162 taxon:10116 .
    }
}
"""

try:
    validate_sparql_with_void(sparql_query, "https://sparql.uniprot.org/sparql")
except Exception as e:
    print(e)


# sparql_query = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
# PREFIX up: <http://purl.uniprot.org/core/>
# PREFIX genex: <http://purl.org/genex#>
# PREFIX obo: <http://purl.obolibrary.org/obo/>
# PREFIX orth: <http://purl.org/net/orth#>
# PREFIX dcterms: <http://purl.org/dc/terms/>
# PREFIX sio: <http://semanticscience.org/resource/>

# SELECT DISTINCT ?flyEnsemblGene ?orthologTaxon ?orthologEnsemblGene ?orthologOmaLink WHERE {
# 	{
#         SELECT DISTINCT ?gene ?flyEnsemblGene {
#         ?gene a orth:Gene ;
#             genex:isExpressedIn/rdfs:label 'eye' ;
#             orth:organism/obo:RO_0002162 ?taxon ;
#             dcterms:identifier ?flyEnsemblGene .
#         ?taxon up:commonName 'fruit fly' .
#         } LIMIT 100
#     }
#     SERVICE <https://sparql.omabrowser.org/sparql> {
#         ?protein2 a orth:Protein .
#         ?protein1 a orth:Protein .
#         ?clusterPrimates a orth:OrthologsCluster .
#         ?cluster a orth:OrthologsCluster ;
#             orth:hasHomologousMember ?node1 ;
#             orth:hasHomologousMember ?node2 .
#         ?node1 orth:hasHomologousMember* ?protein1 .
#         ?node2 orth:hasHomologousMember* ?clusterPrimates .
#         ?clusterPrimates orth:hasHomologousMember* ?protein2 .
#         ?protein1 sio:SIO_010079 ?gene . # is encoded by
#         ?protein2 rdfs:seeAlso ?orthologOmaLink ;
#             orth:organism/obo:RO_0002162 ?orthologTaxonUri ;
#             sio:SIO_010079 ?orthologGene . # is encoded by
#         ?clusterPrimates orth:hasTaxonomicRange ?taxRange .
#         ?taxRange orth:taxRange 'Primates' .
#         FILTER ( ?node1 != ?node2 )
#     }
#     ?orthologTaxonUri up:commonName ?orthologTaxon .
#     ?orthologGene dcterms:identifier ?orthologEnsemblGene .
# }"""

# try:
#     validate_sparql_with_void(sparql_query, "https://www.bgee.org/sparql")
# except Exception as e:
#     print(e)


# # Failing query:
# sparql_query = """PREFIX up: <http://purl.uniprot.org/core/>
# PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
# PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
# SELECT ?protein ?sequence
# WHERE {
#     ?protein a up:Protein .
#     ?protein up:reviewed true .
#     ?protein up:organism taxon:10090 .
#     ?protein up:matureProtein ?matureProtein .
#     ?matureProtein rdf:value ?sequence .
#     FILTER (STRSTARTS(?sequence, "G")) .
# }"""


# print("\n".join(error_msgs))
# print(len(error_msgs))
# qres = g.query("""SELECT ?s ?p ?o WHERE {
#     ?s ?p ?o
# } LIMIT 10""")
# for row in qres:
#     print(f"{row.s} knows {row.o}")


Subject ?pathVar6 in endpoint https://sparql.omabrowser.org/sparql does not support the predicate http://purl.obolibrary.org/obo/RO_0002163 according to the VOID description. Correct predicate might be one of the following: http://purl.obolibrary.org/obo/RO_0002162 (we inferred this variable might be of the type http://purl.org/net/orth#Organism)
