In [1]:
import sqlite3


def parse_sparql_query(sparql: str):
    """
    SPARQL query syntax:
    SELECT ?x ?y WHERE {
    ?x pred_1 some_obj .
    ?y pred_2 ?z
    }
    """
    # format the SPARQL query into a single line for parsing
    sparql = " ".join(line.strip() for line in sparql.splitlines())

    # transform all letters to lower cases.
    sparqll = sparql.lower()

    # find all variables in the SPARQL between the SELECT and WHERE clause.
    select_start = sparqll.find("select ") + 7
    select_end = sparqll.find(" where", select_start)
    variables = sparql[select_start:select_end].split()

    # find all triples between "WHERE {" and "}"
    where_start = sparqll.find("{", select_end) + 1
    where_end = sparqll.rfind("}", where_start)
    where_text = sparql[where_start:where_end]
    triple_texts = where_text.split(".")
    triples = []
    for triple_text in triple_texts:
        subj, pred, obj = triple_text.strip().split(" ", 2)
        triples.append((subj, pred, obj))

    # find the (optional) ORDER BY clause
    order_by_start = sparqll.find(" order by ", where_end)
    if order_by_start > 0:
        search = sparqll[order_by_start + 10:]
        match = re.search(r"^(asc|desc)\((\?[^\s]+)\)", search)
        assert match is not None, \
            f"could not find order by direction or variable in {search}"
        order_by = (match.group(2).strip(), match.group(1) == "asc")
        assert order_by[0] in variables, \
            f"cannot order by, {order_by[0]} not in variables"
        order_by_end = order_by_start + 10 + len(match.group(0))
    else:
        order_by = None
        order_by_end = where_end

    # find the (optional) LIMIT clause
    limit_start = sparqll.find(" limit ", order_by_end)
    if limit_start > 0:
        limit = int(sparql[limit_start + 7:].split()[0])
    else:
        limit = None

    return variables, triples, order_by, limit

In [2]:
parse_sparql_query('SELECT ?x ?y WHERE { ?x pred_1 some_obj . ?y pred_2 ?z }')

(['?x', '?y'],
 [('?x', 'pred_1', 'some_obj'), ('?y', 'pred_2', '?z')],
 None,
 None)