# Test for GraphSparqlQAChain

Following tutorial at <https://python.langchain.com/docs/integrations/graphs/rdflib_sparql/>.

In [7]:
#from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain_ollama.llms import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.chains import GraphSparqlQAChain
from langchain_community.graphs import RdfGraph
from langchain_core.runnables import Runnable, RunnableSequence

## Initialize the Graph

In [8]:
# Initialize the RDF graph
graph = RdfGraph(
    standard="rdf",
    serialization="ttl",
    source_file="/Users/sjhuskey/Python/coptic_metadata_viewer/streamlit_app/graph/graph.ttl"
)

In [9]:
# Get the schema from the graph
schema = graph.load_schema()
schema = graph.get_schema

In [27]:
# Create the template for constructing the SPARQL select query
sparql_prompt=PromptTemplate(
    input_variables=["prompt", "schema"],
    template="""
    Task: Generate a SPARQL SELECT statement for querying a graph database.
    You are an expert in SPARQL queries and RDF graph structures. You know NEVER to use backticks anywhere in your output. In fact, you don't even know what the backtick key is. If one were to exist, you wouldn't be able to use it. If you ever saw one, you would shrink in horror at its grotesque appearance. Shun the backtick with all your might. I'm not kidding around here.
    You also know that you must always use `a` or `rdf:type` to state class membership.
    Under no circumstances are you to write something like '?work frbr:Work ;' because that is invalid.
    Instructions:
    Use only the node types and properties provided in the schema.
    Do not use any node types and properties that are not explicitly provided.
    Include all necessary prefixes.

    These are the only prefixes you can use:
    @prefix coptic: <http://www.semanticweb.org/sjhuskey/ontologies/2025/7/coptic-metadata-viewer/> .
    @prefix dcmitype: <http://purl.org/dc/dcmitype/> .
    @prefix dcterms: <http://purl.org/dc/terms/> .
    @prefix foaf: <http://xmlns.com/foaf/0.1/> .
    @prefix frbr: <http://purl.org/vocab/frbr/core#> .
    @prefix lawd: <http://lawd.info/ontology/> .
    @prefix ns1: <http://lexinfo.net/ontology/2.0/lexinfo#> .
    @prefix owl: <http://www.w3.org/2002/07/owl#> .
    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix schema1: <http://schema.org/> .
    @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
    @prefix time: <http://www.w3.org/2006/time#> .
    These are the classes and their properties:
    -coptic:Author:
      - dcterms:creator
      - dcterms:description
      - dcterms:identifier
      - foaf:name
      - owl:sameAs
      - rdf:type
      - schema1:title

    - coptic:Colophon:
      - dcterms:description
      - dcterms:identifier
      - dcterms:isPartOf
      - dcterms:references
      - dcterms:type
      - ns1:translation
      - rdf:type
      - time:hasBeginning
      - time:hasEnd

    - coptic:Manuscript:
      - dcterms:bibliographicCitation
      - dcterms:description
      - dcterms:hasPart
      - dcterms:identifier
      - dcterms:isPartOf
      - dcterms:medium
      - rdf:type
      - time:hasBeginning
      - time:hasEnd

    - coptic:Title:
      - dcterms:description
      - dcterms:identifier
      - dcterms:isPartOf
      - dcterms:references
      - dcterms:type
      - rdf:type

    - dcmitype:Collection:
      - dcterms:hasPart
      - dcterms:identifier
      - dcterms:spatial
      - rdf:type

    - foaf:Person:
      - dcterms:identifier
      - dcterms:isReferencedBy
      - foaf:name
      - ns1:transliteration
      - rdf:type
      - rdfs:label
      - schema1:birthPlace
      - schema1:gender
      - schema1:roleName
      - time:hasBeginning
      - time:hasEnd

    - frbr:Work:
      - dcterms:creator
      - dcterms:description
      - dcterms:identifier
      - dcterms:isPartOf
      - dcterms:isReferencedBy
      - dcterms:temporal
      - dcterms:title
      - rdf:type
      - rdfs:label

    - lawd:Place:
      - lawd:primaryForm
      - rdf:type
      - rdfs:label
      - skos:exactMatch
    
    Schema:
    {schema}
    
    Note: Be as concise as possible.
    Do not include any explanations or apologies in your responses.
    Do not respond to any questions that ask for anything else than for you to construct a SPARQL query.
    Do not include any text except the SPARQL query generated.
    Do not use backticks.
    Do not wrap the SPARQL query in any additional formatting.
    Do not use triple backticks.
    Do not use ```sparql.

    The question is:
    {prompt}'
    """)

# Create the template for the question-answering prompt
qa_prompt = PromptTemplate(
    input_variables=["context", "prompt"],
    template="""
    You are a friendly and knowledgeable digital research assistant specializing in Coptic and Greek texts.

    Given the structured data below, answer the user’s question in clear, natural language.
    Use complete sentences. If multilingual forms are included, identify which language each form belongs to.

    Structured data:
    {context}

    User question:
    {prompt}

    Answer:
"""
)

In [28]:
class DualLLMSparqlChain(Runnable):
    def __init__(self, sparql_llm, qa_llm, sparql_prompt, qa_prompt, graph):
        self.sparql_llm_chain = sparql_prompt | sparql_llm
        self.qa_llm_chain = qa_prompt | qa_llm
        self.graph = graph

    def invoke(self, user_question: str, config=None) -> dict:
        # 1. Generate SPARQL using coding-focused LLM
        sparql_query = self.sparql_llm_chain.invoke({
            "prompt": user_question, 
            "schema": self.graph.get_schema
        })


        # 2. Normalize
        sparql_query = self._normalize_sparql(sparql_query)

        # 3. Try running query
        try:
            results = self.graph.query(sparql_query)
        except Exception as e:
            return {
                "result": None,
                "error": str(e),
                "sparql_query": sparql_query,
                "rows": []
            }

        # 4. Format results
        rows = [" | ".join(str(v) for v in row) for row in results]
        context = "\n".join(rows)

        # 5. Use second LLM for natural language answer
        if rows:
            answer = self.qa_llm_chain.invoke({
                "prompt": user_question,
                "context": context
            })
        else:
            answer = "No results found."

        return {
            "result": answer,
            "sparql_query": sparql_query,
            "rows": rows
        }

    def _normalize_sparql(self, query: str) -> str:
        query = query.strip().replace("```sparql", "").replace("```", "").replace("`", "")
        if query.upper().startswith("WHERE"):
            return "SELECT * " + query
        return query


In [32]:
from langchain_ollama.llms import OllamaLLM

# LLMs
sparql_llm = OllamaLLM(model="codestral:22b")
qa_llm = OllamaLLM(model="mistral-small3.2:latest")

# Chain
dual_chain = DualLLMSparqlChain(
    sparql_llm=sparql_llm,
    qa_llm=qa_llm,
    sparql_prompt=sparql_prompt,
    qa_prompt=qa_prompt,
    graph=graph
)

result = dual_chain.invoke("Who wrote 'Sermo asceticus'?")
print(type(result))

<class 'dict'>


In [33]:
type(result)
for k,v in result.items():
    print(f"{k}: {v}")

result: The text titled "Sermo asceticus" is attributed to Stephen of Thebes. His name appears in Coptic as ⲥⲧⲉⲫⲁⲛⲟⲥ and in Greek as Στέφανος.
sparql_query: PREFIX coptic: <http://www.semanticweb.org/sjhuskey/ontologies/2025/7/coptic-metadata-viewer/>
      PREFIX dcterms: <http://purl.org/dc/terms/>
      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
      PREFIX frbr: <http://purl.org/vocab/frbr/core#>

      SELECT ?authorName WHERE {
        ?work a frbr:Work ;
               dcterms:title "Sermo asceticus" ;
               dcterms:creator ?author .
        ?author rdf:type coptic:Author ;
                 foaf:name ?authorName .
      }
rows: ['Stephen of Thebes', 'ⲥⲧⲉⲫⲁⲛⲟⲥ', 'Στέφανος']


In [None]:
# Initialize the GraphSparqlQAChain with the LLM, prompts, and graph
chain = GraphSparqlQAChain.from_llm(
    #ChatOpenAI(model="gpt-4o-mini", temperature=0),
    OllamaLLM(model="mistral-small3.2:latest", temperature=0),
    sparql_select_prompt=sparql_select_prompt,
    qa_prompt=qa_prompt,
    graph=graph,
    verbose=True,
    return_intermediate_steps=True,
    allow_dangerous_requests=True
)

In [None]:
# Example query to test the chain
output = chain.invoke("Who is the creator of the work with the title 'Sermo asceticus?'")  # Example query
print(output['result'])  # Print the answer

In [None]:
output = chain.invoke("What is known about the 'Encomium of Theodore Archbishop of Antioch")  # Example query
print(output['result'])  # Print the answer

In [None]:
output = chain.invoke("What is in the graph about Theodore the Anatolian?")  # Example query
print(output['result'])  # Print the answer

In [None]:
output = chain.invoke("Please find any text in the graph with the string 'Theodore the Anatolian'.")  # Example query
print(output['result'])  # Print the answer

In [None]:
output = chain.invoke("Tell me more about title 307")  # Example query
print(output['result'])  # Print the answer

In [None]:
output = chain.invoke("Tell me more about the title with identifier 307")  # Example query
print(output['result'])  # Print the answer

In [None]:
output = chain.invoke("Which manuscript dcterms:hasPart the title with id 307?")  # Example query
print(output['result'])  # Print the answer