In [1]:
from langchain_openai import ChatOpenAI
from langchain.chains import GraphSparqlQAChain
from langchain.prompts import PromptTemplate
from langchain_community.graphs import RdfGraph
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Initialize the RDF graph
graph = RdfGraph(
    standard="rdf",
    serialization="ttl",
    source_file="data/graph.ttl"
)

In [3]:
schema = graph.load_schema()

In [4]:
sparql_select_prompt=PromptTemplate(
    input_variables=["prompt", "schema"],
    template="""
    You are an expert in SPARQL queries and RDF graph structures. Your task is to generate a SPARQL query based on the provided schema and the user's question.
    Use only the node types and properties provided in the schema.
    Do not use any node types or properties not explicitly listed.
    Include all necessary PREFIX declarations.
    Return only the SPARQL query. Do not include any text before or after the query. Do not use triple backticks or any markdown formatting.
    Your RDF graph has these classes and properties:
    - dcmitype:Collection
        - dcterms:hasPart
        - dcterms:identifier
        - dcterms:spatial
        - rdf:type

    - dcmitype:Text
        - dcterms:description
        - dcterms:identifier
        - dcterms:references
        - dcterms:type
        - ns1:translation
        - rdf:type
        - rdf:value
        - time:hasBeginning
        - time:hasEnd

    - dcterms:Agent
        - dcterms:creator
        - dcterms:description
        - foaf:name
        - owl:sameAs
        - rdf:type
        - schema1:title

    - dcterms:PhysicalResource
        - dcterms:bibliographicCitation
        - dcterms:description
        - dcterms:hasPart
        - dcterms:identifier
        - dcterms:isPartOf
        - dcterms:medium
        - rdf:type
        - time:hasBeginning
        - time:hasEnd

    - foaf:Person
        - dcterms:isReferencedBy
        - foaf:name
        - ns1:transliteration
        - rdf:type
        - rdfs:label
        - schema1:birthPlace
        - schema1:gender
        - schema1:roleName
        - time:hasBeginning
        - time:hasEnd

    - frbr:Work
        - dcterms:creator
        - dcterms:description
        - dcterms:hasPart
        - dcterms:identifier
        - dcterms:isReferencedBy
        - dcterms:temporal
        - dcterms:title
        - rdf:type
        - rdfs:label

    - lawd:Place
        - lawd:primaryForm
        - rdf:type
        - rdfs:label
        - skos:exactMatch
    Do not use any node types and properties that are not explicitly provided.
    Include all necessary prefixes.
    Always return a human-readable label instead of a URI when possible.
    \n\nThe question is:\n{prompt}
    Schema: {schema}
    """)

qa_prompt = PromptTemplate(
    input_variables=["context", "prompt"],
    template="""
You are a friendly and knowledgeable digital research assistant specializing in Coptic and Greek texts.

Given the structured data below, answer the user’s question in clear, natural language.
Use complete sentences. If multilingual forms are included, identify which language each form belongs to.

Structured data:
{context}

User question:
{prompt}

Answer:
"""
)

In [5]:
chain = GraphSparqlQAChain.from_llm(
    ChatOpenAI(model="gpt-4o-mini", temperature=0),
    sparql_select_prompt=sparql_select_prompt,
    qa_prompt=qa_prompt,
    graph=graph,
    verbose=True,
    return_intermediate_steps=True,
    allow_dangerous_requests=True
)

In [8]:
output = chain.invoke("Who is the author of the work with the title 'Sermo asceticus?'")  # Example query
print(output['result'])  # Print the answer



[1m> Entering new GraphSparqlQAChain chain...[0m
Identified intent:
[32;1m[1;3mSELECT[0m
Generated SPARQL:
[32;1m[1;3mPREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX frbr: <http://purl.org/vocab/frbr/core#>

SELECT ?authorName WHERE {
    ?work a frbr:Work .
    ?work dcterms:title "Sermo asceticus" .
    ?work dcterms:creator ?creator .
    ?creator foaf:name ?authorName .
}[0m
Full Context:
[32;1m[1;3m[(rdflib.term.Literal('Stephen of Thebes'),), (rdflib.term.Literal('ⲥⲧⲉⲫⲁⲛⲟⲥ', lang='cop'),), (rdflib.term.Literal('Στέφανος', lang='el'),)][0m

[1m> Finished chain.[0m
The author of the work titled 'Sermo asceticus' is Stephen of Thebes. In Coptic, his name is written as ⲥⲧⲉⲫⲁⲛⲟⲥ, and in Greek, it is Στέφανος.
