In [17]:
from rdflib import Graph, Namespace, RDF, RDFS, OWL

def extract_ontology_description(turtle_file):
    g = Graph()
    g.parse(turtle_file, format="turtle")

    # Define namespaces
    OWL_NS = Namespace("http://www.w3.org/2002/07/owl#")
    XSD_NS = Namespace("http://www.w3.org/2001/XMLSchema#")

    # Extract Classes
    classes = set(g.subjects(RDF.type, OWL_NS.Class))

    # Extract Object and Datatype Properties
    object_properties = set(g.subjects(RDF.type, OWL_NS.ObjectProperty))
    datatype_properties = set(g.subjects(RDF.type, OWL_NS.DatatypeProperty))

    # Extract example triples
    example_triples = []
    for s, p, o in g:
        example_triples.append(f"{s.n3(g.namespace_manager)} {p.n3(g.namespace_manager)} {o.n3(g.namespace_manager)}.")

    # Create a human-readable description
    description = []

    # Classes
    description.append("Classes:")
    if classes:
        for cls in classes:
            label = g.value(cls, RDFS.label) or g.value(cls, Namespace("http://www.w3.org/2004/02/skos/core#prefLabel"))
            description.append(f"- {cls.n3(g.namespace_manager)} ({label if label else 'No label'})")
    else:
        description.append("- None found")

    # Properties
    description.append("\nObject Properties:")
    if object_properties:
        for prop in object_properties:
            domain = list(g.objects(prop, RDFS.domain))
            range_ = list(g.objects(prop, RDFS.range))
            domain_str = ", ".join(d.n3(g.namespace_manager) for d in domain) if domain else "Unknown"
            range_str = ", ".join(r.n3(g.namespace_manager) for r in range_) if range_ else "Unknown"
            label = g.value(prop, Namespace("http://www.w3.org/2004/02/skos/core#prefLabel"))
            description.append(f"- {prop.n3(g.namespace_manager)} (label: {label if label else 'No label'}, domain: {domain_str}, range: {range_str})")
    else:
        description.append("- None found")

    description.append("\nDatatype Properties:")
    if datatype_properties:
        for prop in datatype_properties:
            domain = list(g.objects(prop, RDFS.domain))
            range_ = list(g.objects(prop, RDFS.range))
            domain_str = ", ".join(d.n3(g.namespace_manager) for d in domain) if domain else "Unknown"
            range_str = ", ".join(r.n3(g.namespace_manager) for r in range_) if range_ else "Unknown"
            label = g.value(prop, Namespace("http://www.w3.org/2004/02/skos/core#prefLabel"))
            description.append(f"- {prop.n3(g.namespace_manager)} (label: {label if label else 'No label'}, domain: {domain_str}, range: {range_str})")
    else:
        description.append("- None found")

    # Example Triples
    description.append("\nExample Triples:")
    if example_triples:
        description.extend(f"- {triple}" for triple in example_triples[:10])  # Limit to 10 triples for brevity
    else:
        description.append("- None found")

    return "\n".join(description)


In [18]:
turtle_file = "../data/to_test.ttl"  # Replace with the path to your .ttl file
ontology_description = extract_ontology_description(turtle_file)
print(ontology_description)

Classes:
- <https://test.org/dsp_groupf1/def#SP_Building_F_LightingSystem> (No label)
- <https://test.org/dsp_groupf1/def#Floor1_E> (No label)
- <https://test.org/dsp_groupf1/def#SP_Building_A> (No label)
- <https://test.org/dsp_groupf1/def#Floor2_A> (No label)
- <https://test.org/dsp_groupf1/def#Floor1_F> (No label)
- <https://test.org/dsp_groupf1/def#Floor3_B> (No label)
- <https://test.org/dsp_groupf1/def#SP904> (No label)
- <https://test.org/dsp_groupf1/def#Floor2_E> (No label)
- <https://test.org/dsp_groupf1/def#SP_Building_G_LightingSystem> (No label)
- <https://test.org/dsp_groupf1/def#Floor2_B> (No label)
- <https://test.org/dsp_groupf1/def#SP_Building_B> (No label)
- <https://test.org/dsp_groupf1/def#SP_Building_C> (No label)
- <https://test.org/dsp_groupf1/def#SP_Building_D> (No label)
- <https://test.org/dsp_groupf1/def#SP_Building_E> (No label)
- <https://test.org/dsp_groupf1/def#Floor3_A> (No label)
- <https://test.org/dsp_groupf1/def#Floor1_B> (No label)
- <https://test.o

In [23]:
from openai import OpenAI
import os
from difflib import get_close_matches
import json
import re
from data import ENTITY_TYPES, RELATIONSHIP_TYPES

api_key = os.getenv('OPENAI_API_KEY')
organisation = os.getenv('ORGANISATION')
client = OpenAI(organization=organisation, api_key=api_key)

In [28]:
def construct_sparql_query(user_query):
    task = """
        Task:
        You are an assistant trained to generate SPARQL queries based on the given ontology and user prompts. Use the ontology description to understand the structure and generate accurate queries. Assume the ontology is loaded in a database.
        only return the sparql query.
        """
    
    

    prompt = f"""
    Ontology Description:
    {ontology_description}

    {task}

    User Query:
    {user_query}
    """

    response = client.chat.completions.create(
        model="o1-mini",
        messages=[
            # {"role":"system", "content": "You are an expert in ontology-based SPARQL query generation."},
            {"role": "user", "content": prompt}
        ]
    )

    choice_str = response.choices[0].message.content
    
    return choice_str

In [45]:
user_query = "find me all buildings with light bulbs"

query = construct_sparql_query(user_query)

In [52]:
query[10:-3]

'PREFIX def: <https://test.org/dsp_groupf1/def#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n\nSELECT DISTINCT ?building\nWHERE {\n  ?building a ?buildingClass .\n  VALUES ?buildingClass {\n    def:SP_Building_A\n    def:SP_Building_B\n    def:SP_Building_C\n    def:SP_Building_D\n    def:SP_Building_E\n    def:SP_Building_F\n    def:SP_Building_G\n  }\n  ?lightingSystem a ?lightingSystemClass .\n  VALUES ?lightingSystemClass {\n    def:SP_Building_F_LightingSystem\n    def:SP_Building_G_LightingSystem\n  }\n  ?lightingSystem def:hasPart ?lightbulb .\n  ?lightbulb a def:Lightbulb .\n  # Assuming the lighting system is part of the building\n  FILTER EXISTS {\n    ?building def:hasPart ?lightingSystem .\n  }\n}\n'

In [54]:
from rdflib.plugins.sparql import prepareQuery
def execute_query(query):
    query_mod = query[10:-3]
    g = Graph()
    g.parse(turtle_file, format="turtle")
    q = prepareQuery(query_mod)
    results = g.query(q)
    for row in results:
        print(row)
    return results

In [34]:
from rdflib.plugins.sparql import prepareQuery



# Define the SPARQL query
sparql_query = 'PREFIX def: <https://test.org/dsp_groupf1/def#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\nSELECT DISTINCT ?building WHERE {\n  ?lightingSystem rdfs:subClassOf ?building .\n  ?lightingSystem def:hasPart ?lightbulb .\n  ?lightbulb rdf:type def:Lightbulb .\n}\n'

# Parse the turtle file
g = Graph()
g.parse(turtle_file, format="turtle")

# Prepare and execute the query
q = prepareQuery(sparql_query)
results = g.query(q)

# Print the results
for row in results:
    print(row)

(rdflib.term.URIRef('https://test.org/dsp_groupf1/def#SP_Building_F'),)
(rdflib.term.URIRef('https://test.org/dsp_groupf1/def#SP_Building_G'),)


In [None]:
from tests.tests import TESTS
import pandas as pd

In [40]:
tests_df = pd.DataFrame(TESTS)

In [70]:
tests_df.iloc[6]['expected_output']

[['class', 'label'],
 ['https://test.org/dsp_groupf1/def#Object', 'Object'],
 ['https://test.org/dsp_groupf1/def#SP904', 'Science Park 904 campus'],
 ['https://test.org/dsp_groupf1/def#SP_Building_A', 'Building A'],
 ['https://test.org/dsp_groupf1/def#SP_Building_B', 'Building B'],
 ['https://test.org/dsp_groupf1/def#SP_Building_C', 'Building C'],
 ['https://test.org/dsp_groupf1/def#SP_Building_D', 'Building D'],
 ['https://test.org/dsp_groupf1/def#SP_Building_E', 'Building E'],
 ['https://test.org/dsp_groupf1/def#SP_Building_F', 'Building F'],
 ['https://test.org/dsp_groupf1/def#SP_Building_G', 'Building G']]

In [65]:
for query in tests_df['query']:
    print(query)
    query = construct_sparql_query(query)
    print(execute_query(query))
    print()

Retrieve the model of the lightbulb in the lighting system of Building G.
(rdflib.term.Literal('Phillips 123A'),)
<rdflib.plugins.sparql.processor.SPARQLResult object at 0x16a5d7ad0>

Retrieve the wattage of the lightbulb in the lighting system of Building F.
(rdflib.term.Literal('12.5', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#decimal')),)
<rdflib.plugins.sparql.processor.SPARQLResult object at 0x16a64b140>

Retrieve all lightbulbs and their models.
(rdflib.term.URIRef('https://test.org/dsp_groupf1/def#Lightbulb_building_F'), rdflib.term.Literal('IKEA NOT GOOD'))
(rdflib.term.URIRef('https://test.org/dsp_groupf1/def#Lightbulb_building_G'), rdflib.term.Literal('Phillips 123A'))
<rdflib.plugins.sparql.processor.SPARQLResult object at 0x16a626a50>

Retrieve all buildings and their lighting systems.
(rdflib.term.URIRef('https://test.org/dsp_groupf1/def#SP_Building_F'), rdflib.term.URIRef('https://test.org/dsp_groupf1/def#SP_Building_F_LightingSystem'))
(rdflib.term.UR