In [34]:
from langchain_openai import ChatOpenAI
from langchain.chains import GraphSparqlQAChain
from langchain_community.graphs import RdfGraph
from dotenv import load_dotenv
load_dotenv()

True

In [35]:
# Initialize the RDF graph
graph = RdfGraph(
    standard="rdf",
    serialization="ttl",
    source_file="data/graph.ttl"
)

In [14]:
graph.load_schema()
schema = graph.schema.split("\n")
schema

['In the following, each IRI is followed by the local name and optionally its description in parentheses. ',
 'The RDF graph supports the following node types:',
 '<http://lawd.info/ontology/Place> (Place, None), <http://purl.org/dc/terms/Agent> (Agent, None), <http://purl.org/dc/dcmitype/Collection> (Collection, None), <http://www.cidoc-crm.org/cidoc-crm/E84_Information_Carrier> (E84_Information_Carrier, None), <http://xmlns.com/foaf/0.1/Person> (Person, None), <http://purl.org/vocab/frbr/core#Work> (Work, None), <http://www.cidoc-crm.org/cidoc-crm/E34_Inscription> (E34_Inscription, None), <http://www.cidoc-crm.org/cidoc-crm/E35_Title> (E35_Title, None), <http://www.cidoc-crm.org/cidoc-crm/Identifier> (Identifier, None)',
 'The RDF graph supports the following relationships:',
 '<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by> (P1_is_identified_by, None), <http://www.cidoc-crm.org/cidoc-crm/P94_has_created> (P94_has_created, None), <http://www.cidoc-crm.org/cidoc-crm/P128_carr

In [20]:
classes = """<http://lawd.info/ontology/Place>, <http://purl.org/dc/terms/Agent>, <http://purl.org/dc/dcmitype/Collection>, <http://www.cidoc-crm.org/cidoc-crm/E84_Information_Carrier>, <http://xmlns.com/foaf/0.1/Person>, <http://purl.org/vocab/frbr/core#Work>, <http://www.cidoc-crm.org/cidoc-crm/E34_Inscription>, <http://www.cidoc-crm.org/cidoc-crm/E35_Title>, <http://www.cidoc-crm.org/cidoc-crm/Identifier>"""
properties = """<http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by>, <http://www.cidoc-crm.org/cidoc-crm/P94_has_created>, <http://www.cidoc-crm.org/cidoc-crm/P128_carries>, <http://purl.org/dc/terms/hasPart>, <http://www.cidoc-crm.org/cidoc-crm/P128_is_carried_by>, <http://purl.org/dc/terms/isPartOf>, <http://www.w3.org/1999/02/22-rdf-syntax-ns#value>, <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, <http://lexinfo.net/ontology/2.0/lexinfo#transliteration>, <http://purl.org/dc/terms/type>, <http://www.cidoc-crm.org/cidoc-crm/P48_has_preferred_identifier>, <http://purl.org/dc/terms/description>, <http://purl.org/dc/terms/issued>, <http://purl.org/dc/terms/temporal>, <http://www.cidoc-crm.org/cidoc-crm/P1_identifies>, <http://purl.org/dc/terms/title>, <http://www.w3.org/2006/time#hasBeginning>, <http://www.cidoc-crm.org/cidoc-crm/P2_has_type>, <http://purl.org/dc/terms/creator>, <http://xmlns.com/foaf/0.1/name>, <http://purl.org/dc/terms/identifier>, <http://purl.org/dc/terms/extent>, <http://purl.org/dc/terms/bibliographicCitation>, <http://www.w3.org/2006/time#hasEnd>, <http://www.w3.org/2004/02/skos/core#exactMatch>, <http://www.w3.org/2000/01/rdf-schema#label>, <http://www.cidoc-crm.org/cidoc-crm/P55_has_current_location>, <http://www.cidoc-crm.org/cidoc-crm/P94_was_created_by>, <http://lawd.info/ontology/primaryForm>, <http://purl.org/dc/terms/references>, <http://purl.org/dc/terms/isReferencedBy>, <http://www.cidoc-crm.org/cidoc-crm/P4_has_time_span>, <http://lexinfo.net/ontology/2.0/lexinfo#translation>, <http://schema.org/gender>, <http://schema.org/roleName>, <http://www.w3.org/2002/07/owl#sameAs>, <http://purl.org/dc/terms/spatial>, <http://schema.org/title>, <http://www.cidoc-crm.org/cidoc-crm/P98_brought_into_life>"""

class_list = classes.split(", ")
property_list = properties.split(", ")
for i in class_list:
    i_list = i.split("/")
    i = i_list[-1].strip(">")
    print(i)
for i in property_list:
    i = i.split("/")[-1].strip(">")
    print(i)

Place
Agent
Collection
E84_Information_Carrier
Person
core#Work
E34_Inscription
E35_Title
Identifier
P1_is_identified_by
P94_has_created
P128_carries
hasPart
P128_is_carried_by
isPartOf
22-rdf-syntax-ns#value
22-rdf-syntax-ns#type
lexinfo#transliteration
type
P48_has_preferred_identifier
description
issued
temporal
P1_identifies
title
time#hasBeginning
P2_has_type
creator
name
identifier
extent
bibliographicCitation
time#hasEnd
core#exactMatch
rdf-schema#label
P55_has_current_location
P94_was_created_by
primaryForm
references
isReferencedBy
P4_has_time_span
lexinfo#translation
gender
roleName
owl#sameAs
spatial
title
P98_brought_into_life


In [25]:
chain = GraphSparqlQAChain.from_llm(
    ChatOpenAI(model="gpt-4o-mini", temperature=0),
    graph=graph, 
    verbose=True,
    return_intermediate_steps=True,
    allow_dangerous_requests=True
)

In [26]:
chain.run("Who is the author of the work with the title 'Sermo asceticus?'")  # Example query

  chain.run("Who is the author of the work with the title 'Sermo asceticus?'")  # Example query




[1m> Entering new GraphSparqlQAChain chain...[0m
Identified intent:
[32;1m[1;3mSELECT[0m
Generated SPARQL:
[32;1m[1;3m```
PREFIX dc: <http://purl.org/dc/terms/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX frbr: <http://purl.org/vocab/frbr/core#>

SELECT ?author
WHERE {
    ?work frbr:title "Sermo asceticus" .
    ?work crm:P94_was_created_by ?author .
}
```[0m


ParseException: Expected {SelectQuery | ConstructQuery | DescribeQuery | AskQuery}, found '`'  (at char 0), (line:1, col:1)

In [36]:
# Manual approach: Clean the SPARQL and run it directly
def clean_sparql_query(query_text):
    """Remove markdown formatting from SPARQL query"""
    # Remove triple backticks and any language identifier
    query_text = query_text.strip()
    
    # Remove opening ```sparql or ```
    if query_text.startswith('```'):
        lines = query_text.split('\n')
        # Remove the first line with ```
        lines = lines[1:]
        query_text = '\n'.join(lines)
    
    # Remove closing ```
    if query_text.endswith('```'):
        query_text = query_text[:-3]
    
    return query_text.strip()

In [37]:
# Create a working wrapper function
def ask_question(question, verbose=True):
    """
    Ask a question about the RDF graph, handling the markdown formatting issue
    """
    try:
        # Use the original chain to generate SPARQL, but catch the error
        if verbose:
            print(f"Question: {question}")
            print("Generating SPARQL query...")
        
        # Get the LLM to generate SPARQL
        llm = ChatOpenAI(model="gpt-4o",temperature=0)


        
        # Create a prompt for SPARQL generation
        prompt = f"""You are an expert in RDF data and SPARQL queries.

        Your RDF graph has these classes and properties:
            - dcmitype:Collection
                - dcterms:hasPart
                - dcterms:identifier
                - dcterms:spatial
                - rdf:type

            - dcmitype:Text
                - dcterms:description
                - dcterms:identifier
                - dcterms:references
                - dcterms:type
                - ns1:translation
                - rdf:type
                - rdf:value
                - time:hasBeginning
                - time:hasEnd

            - dcterms:Agent
                - dcterms:creator
                - dcterms:description
                - foaf:name
                - owl:sameAs
                - rdf:type
                - schema1:title

            - dcterms:PhysicalResource
                - dcterms:bibliographicCitation
                - dcterms:description
                - dcterms:hasPart
                - dcterms:identifier
                - dcterms:isPartOf
                - dcterms:medium
                - rdf:type
                - time:hasBeginning
                - time:hasEnd

            - foaf:Person
                - dcterms:isReferencedBy
                - foaf:name
                - ns1:transliteration
                - rdf:type
                - rdfs:label
                - schema1:birthPlace
                - schema1:gender
                - schema1:roleName
                - time:hasBeginning
                - time:hasEnd

            - frbr:Work
                - dcterms:creator
                - dcterms:description
                - dcterms:hasPart
                - dcterms:identifier
                - dcterms:isReferencedBy
                - dcterms:temporal
                - dcterms:title
                - rdf:type
                - rdfs:label

            - lawd:Place
                - lawd:primaryForm
                - rdf:type
                - rdfs:label
                - skos:exactMatch

        DO NOT include any other classes or properties.

        Question: {question}

Generate a SPARQL query to answer this question. Return only the SPARQL query.

SPARQL Query:"""
        
        generated_sparql = llm.invoke(prompt).content
        
        if verbose:
            print("Generated SPARQL (raw):")
            print(repr(generated_sparql))        
        
        if verbose:
            print("Cleaned SPARQL:")
            print(clean_sparql_query(generated_sparql))

        # Execute the generated query
        results = graph.query(clean_sparql_query(generated_sparql))

        # Format results
        result_list = []
        for row in results:
            result_list.append(str(row[0]) if len(row) == 1 else [str(item) for item in row])
        
        if verbose:
            print(f"Found {len(result_list)} results:")
            for i, result in enumerate(result_list, 1):
                print(f"  {i}. {result}")
        
        return {
            "query": question,
            "sparql": generated_sparql,
            "results": result_list
        }
        
    except Exception as e:
        print(f"Error: {e}")
        return {
            "query": question,
            "error": str(e)
        }

# Test the wrapper function
result = ask_question("Who is the author of the work with the title 'Sermo asceticus'?")
print("\\nFinal result:", result)

Question: Who is the author of the work with the title 'Sermo asceticus'?
Generating SPARQL query...
Generated SPARQL (raw):
'```sparql\nPREFIX dcterms: <http://purl.org/dc/terms/>\nPREFIX frbr: <http://purl.org/vocab/frbr/core#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n\nSELECT ?author\nWHERE {\n  ?work rdf:type frbr:Work ;\n        dcterms:title "Sermo asceticus" ;\n        dcterms:creator ?author .\n}\n```'
Cleaned SPARQL:
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX frbr: <http://purl.org/vocab/frbr/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?author
WHERE {
  ?work rdf:type frbr:Work ;
        dcterms:title "Sermo asceticus" ;
        dcterms:creator ?author .
}
Found 1 results:
  1. https://paths.uniroma1.it/atlas/authors/11
\nFinal result: {'query': "Who is the author of the work with the title 'Sermo asceticus'?", 'sparql': '```sparql\nPREFIX dcterms: <http://purl.org/dc/terms/>\nPREFIX frbr: <http://purl.org/vocab/frbr/core