In [4]:
from openai import OpenAI
import os
from difflib import get_close_matches
import json
import re
from data import ENTITY_TYPES, RELATIONSHIP_TYPES

api_key = os.getenv('OPENAI_API_KEY')
organisation = os.getenv('ORGANISATION')
client = OpenAI(organization=organisation, api_key=api_key)


def construct_sparql_query(nl_query):

    prompt = f"""
    Create a Sparql query from a natural language query:
    "{nl_query}"
    The Entity and Relationship types are given, coming from the linked database. Semantically match the entities and relationships in the query to the ones in the database also apply any filters necessary. Only output the given query.
    - `Here is a list of entities and relationships in the database, so use those to find the closest related ones`: entitites: "{ENTITY_TYPES}", relationships: "{RELATIONSHIP_TYPES}
    "
    """

    response = client.chat.completions.create(
        model="o1-mini",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    choice_str = response.choices[0].message.content
    
    return choice_str


    """
    Construct a SPARQL query from a natural language query.
    """
    # Extract entities and relationships using GPT
    extracted_data = extract_entities_and_relationships(nl_query)
    print(extracted_data)

    entities = extracted_data.get('entities', [])
    filters = extracted_data.get('filters', {})
    relationships = extracted_data.get('relationships', [])
    
    sparql_query = "SELECT ?subject ?predicate ?object WHERE {\n"
    
    for entity in entities:
        entity_type = find_closest_matches([entity], ENTITY_TYPES).get(entity, entity)
        sparql_query += f"  ?subject rdf:type :{entity_type} .\n"
    
    for relationship in relationships:
        from_entity = find_closest_matches([relationship['from']], ENTITY_TYPES).get(relationship['from'], relationship['from'])
        to_entity = find_closest_matches([relationship['to']], ENTITY_TYPES).get(relationship['to'], relationship['to'])
        relationship_type = find_closest_matches([relationship['relationship']], RELATIONSHIP_TYPES).get(relationship['relationship'], relationship['relationship'])
        sparql_query += f"  ?{from_entity} :{relationship_type} ?{to_entity} .\n"
    
    if filters:
        attribute = filters.get('attribute')
        value = filters.get('value')
        entity = filters.get('entity')
        entity_type = find_closest_matches([entity], ENTITY_TYPES).get(entity, entity)
        sparql_query += f"  ?{entity_type} :{attribute} \"{value}\" .\n"
    
    sparql_query += "}"
    
    return sparql_query

In [None]:
nl_query = "buildings constructed in new york after 2009"
sparql_query = construct_sparql_query(nl_query)
print("Generated SPARQL Query:\n", sparql_query)

# NExt steps

1. predicates: relationships
2. entities
3. filters (get light bulb model manufactured after 2009)

Vector embeddings -pretrained model like BERT etc..

"Light bulb manufactured by Philips" -> "by" == "isSubclassOf", "manufacturedBY" == "isCHildOf"