# Running example queries

Trying to run example SPARQL queries using `SPARQLWrapper`. Painful experience, the wrapper is not working well even with basic Virtuoso SPARQL endpoint

## Questions to test

Select all human UniProt entries with a sequence variant that leads to a tyrosine to phenylalanine substitution



In [2]:
import time
import json
import re

from qdrant_client.models import FieldCondition, Filter, MatchValue, ScoredPoint
from rdflib.plugins.sparql import prepareQuery, prepareUpdate
from rdflib.plugins.sparql.algebra import translateQuery
from SPARQLWrapper import JSON, TURTLE, XML, SPARQLWrapper

from expasy_api.embed import QUERIES_COLLECTION, get_vectordb

vectordb = get_vectordb("localhost")
all_queries, _ = vectordb.scroll(
    collection_name=QUERIES_COLLECTION,
    scroll_filter=Filter(
        must=[
            FieldCondition(
                key="doc_type",
                match=MatchValue(value="sparql"),
            )
        ]
    ),
    limit=1000,
    with_payload=True,
    with_vectors=False,
)

def ensure_limit(query: str) -> str:
    limit_pattern = re.compile(r"LIMIT\s+\d+\s*$", re.IGNORECASE)
    if not limit_pattern.search(query.rstrip()):
        query = query.rstrip() + " LIMIT 1"
    return query

questions_results = []

queries_returning_rdf = ["DescribeQuery", "ConstructQuery"]
skip_queries = [7, 27, 32, 37, 38, 42, 43, 47, 62, 63]
print(len(all_queries))
for i, query_record in enumerate(all_queries):
    # if i < 63:
    #     continue
    start_time = time.time()
    try:
        # print(query_record.payload)
        question = query_record.payload["comment"].split(":", 1)[1].strip()
        endpoint = query_record.payload["endpoint"]
        query = query_record.payload["example"]
        if i in skip_queries:
            print(f"⏩️ {i}/{len(all_queries)} Skipping to avoid timeout: {question} <{endpoint}>")
            continue

        print(f"[{i}/{len(all_queries)}] {question} <{endpoint}>")

        parsed_query = prepareQuery(query)
        # print(parsed_query.algebra.name)

        sparql_endpoint = SPARQLWrapper(endpoint)
        if parsed_query.algebra.name not in queries_returning_rdf:
            sparql_endpoint.setReturnFormat(JSON)
        else:
            sparql_endpoint.setReturnFormat(TURTLE)
            continue

        sparql_endpoint.setQuery(query)
        sparql_endpoint.setTimeout(200)

        # NOTE: for some reason CONSTRUCT queries are failing directly with every format
        results = sparql_endpoint.query().convert()
        # results = sparql_endpoint.queryAndConvert()
        if parsed_query.algebra.name in queries_returning_rdf:
            # print(results)
            res_count = len(results)
        elif parsed_query.algebra.name == "AskQuery":
            res_count = 1 if results["boolean"] else 0
        else:
            res_count = len(results["results"]["bindings"])

        string_resp = "✅" if res_count > 0 else "⚠️"
        if res_count > 0:
            string_resp = "✅"
            questions_results.append(
                {
                    "question": question,
                    "endpoint": endpoint,
                    "query": query,
                    "results": res_count,
                    "runtime": int(time.time() - start_time)
                }
            )
        else:
            string_resp = "❌"
        if len(questions_results) % 10 == 0:
            print(json.dumps(questions_results, indent=4))
    except Exception as e:
        res_count = str(e)
        string_resp = "💣️"
        # print(query)

    print(f"{string_resp} results {res_count} in {time.time() - start_time:.2f}s")

print(json.dumps(questions_results, indent=4))

119
⏩️ 63/119 Skipping to avoid timeout: Find UniProt entries with a transmembrane region, with an Alanine in the 15 Aminoacid region preceding the transmembrane <https://sparql.uniprot.org/sparql/>
[64/119] Retrieve glycosylation sites and glycans on human enzymes (federated with glyconnect) <https://sparql.uniprot.org/sparql/>
💣️ results EndPointInternalError: The endpoint returned the HTTP status code 500. 

Response:
b'<!DOCTYPE html SYSTEM "about:legacy-compat">\n<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head><title>Error</title><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"/><link href="/" rel="home"/><link href="/base.css" type="text/css" rel="stylesheet"/><link type="image/vnd.microsoft.icon" href="/uniprot-favicon.ico" rel="shortcut icon"/><link href="/uniprot-sparql.css" type="text/css" rel="stylesheet"/><link href="http://creativecommons.org/licenses/by/4.0/" rel="license"/><script src="https://cdn.jsdelivr.net/npm/yasgui-yasqe@2