In [1]:
from src.llm_query_generator.chat_history import ChatHistory
from src.llm_query_generator.db import Neo4jAdapter
from src.llm_query_generator.llm import OpenAILLM
import os
from openai import OpenAI
import json
from pathlib import Path
from tqdm import tqdm

In [2]:
URI = "bolt://localhost:7687"
USER = ""
PASSWORD = ""

In [3]:
neo4j = Neo4jAdapter(URI, USER, PASSWORD).connect()
llm = OpenAILLM(api_key=os.environ.get("OPENAI_KEY"),model="gpt-4-1106-preview",temperature=0.0)

In [43]:
scripts= Path("./scripts/CypherScript_XAI4Wind.txt").read_text(encoding="utf-8").split(";\n")

In [21]:
for command in tqdm(scripts):
    script = command.strip()
    if script:
        try:
            neo4j.execute(script)
        except Exception as e:
            print(f"Error executing {script}")
            print(e)


100%|██████████| 1270/1270 [00:18<00:00, 70.14it/s] 

Error executing DROP CONSTRAINT FOR (node:`UNIQUE IMPORT LABEL`) REQUIRE (node.`UNIQUE IMPORT ID`) IS UNIQUE
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input '(': expected "IF" or <EOF> (line 1, column 21 (offset: 20))
"DROP CONSTRAINT FOR (node:`UNIQUE IMPORT LABEL`) REQUIRE (node.`UNIQUE IMPORT ID`) IS UNIQUE"
                     ^}





In [4]:
neo4j.get_schema()

"\n            Node properties are the following:\n            [{'properties': [{'property': 'number', 'type': 'INTEGER'}, {'property': 'alarm_no', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}], 'labels': 'Alarm'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'ESC'}, {'properties': [{'property': 'fno', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}], 'labels': 'YawBrake'}, {'properties': [{'property': 'name', 'type': 'STRING'}, {'property': 'contents', 'type': 'STRING'}], 'labels': 'Predictive'}, {'properties': [{'property': 'activities', 'type': 'LIST'}, {'property': 'number', 'type': 'INTEGER'}], 'labels': 'CorrAct'}, {'properties': [{'property': 'fno', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}], 'labels': 'Test'}, {'properties': [{'property': 'number', 'type': 'INTEGER'}, {'property': 'activities', 'type': 'LIST'}, {'property': 'image_url', 'type': 'LIST'}, {'property': 'details', 'type': 'STRING'}], 'labels': 'P

In [28]:
import re

def modify_cypher_command(command):
    # Regular expression to find a label with a number at the end of the command
    match = re.search(r'SET n:([A-Za-z]+)(\d+);$', command)
    if match:
        # Extract the label and the number
        label = match.group(1)
        number = match.group(2)

        # Modify the command to include the number as a property and update the label
        new_command = re.sub(r'SET n:([A-Za-z]+\d+);$', f'SET n:{label};', command)
        new_command = re.sub(r'(\{.*\})', r'\1, `number`: ' + number, new_command)
        return new_command
    else:
        # Return the original command if no label with number is found
        return command

# Example usage
command = "CREATE (n:`UNIQUE IMPORT LABEL`{`UNIQUE IMPORT ID`: row._id}) SET n += row.properties SET n:Alarm16;"
modified_command = modify_cypher_command(command)
print(modified_command)


CREATE (n:`UNIQUE IMPORT LABEL`{`UNIQUE IMPORT ID`: row._id}, `number`: 16) SET n += row.properties SET n:Alarm;


In [5]:
query = 'In how many movies did tom hanks play as a actor'
prompt = neo4j.build_prompt(query)
prompt

"Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n\n            Node properties are the following:\n            [{'properties': [{'property': 'title', 'type': 'STRING'}, {'property': 'tagline', 'type': 'STRING'}, {'property': 'released', 'type': 'INTEGER'}], 'labels': 'Movie'}, {'properties': [{'property': 'born', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}], 'labels': 'Person'}]\n            Relationship properties are the following:\n            [{'type': 'ACTED_IN', 'properties': [{'property': 'roles', 'type': 'LIST'}]}, {'type': 'REVIEWED', 'properties': [{'property': 'summary', 'type': 'STRING'}, {'property': 'rating', 'type': 'INTEGER'}]}]\n            The relationships are the following:\n            ['(:Person)-[:ACTED_IN]->(:Movie)', '(:Person)-[:DIRECTED]->(:Movie)', '(:Person)-[:

In [5]:
import re

pattern = r"```.*?\n(.*?)```"

def clean_generation(query:str)->str:
    match = re.search(pattern, query, re.DOTALL)
    if match:
        code = match.group(1)
        return code
    return query

In [7]:
cypher_query=clean_generation(llm.generate(prompt))
cypher_query

'MATCH (p:Person {name: "Tom Hanks"})-[:ACTED_IN]->(m:Movie)\nRETURN COUNT(m) AS NumberOfMoviesTomHanksActedIn\n'

In [4]:
neo4j.execute('MATCH(n:FaultEvents)-[:TYPE]-(p)-[:AFFECTS]-(q{name:"Transformer"}) RETURN p')

[{'p': {'number': 47,
   'details': 'Turbine does not couple. The stator contact input is not received after requesting\nconnection.'}},
 {'p': {'number': 49,
   'details': 'Circuit Breaker Not Ok. Any thermal fuse signal is deactivated for more than 5s.'}},
 {'p': {'number': 48,
   'details': 'Transformer fan circuit breaker. The thermal fuse of the trafonsmer fan is deactivated.'}},
 {'p': {'number': 52,
   'details': 'Analogue module failure. Error in analog card.'}},
 {'p': {'number': 51,
   'details': 'Temperature measurement module failure. Error in Temperature card (RTD)'}},
 {'p': {'number': 50,
   'details': 'PLC Module Failure. Peripheral Module Failure'}},
 {'p': {'number': 54, 'details': 'UPS failure'}},
 {'p': {'number': 53, 'details': 'Communication error'}}]

In [6]:
query = 'Show all details of fault events relating to wind turbine transformer subsystem.'
prompt = neo4j.build_prompt(query)
print(prompt)

Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:

            Node properties are the following:
            [{'properties': [{'property': 'number', 'type': 'INTEGER'}, {'property': 'alarm_no', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}], 'labels': 'Alarm'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'ESC'}, {'properties': [{'property': 'fno', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}], 'labels': 'YawBrake'}, {'properties': [{'property': 'name', 'type': 'STRING'}, {'property': 'contents', 'type': 'STRING'}], 'labels': 'Predictive'}, {'properties': [{'property': 'activities', 'type': 'LIST'}, {'property': 'number', 'type': 'INTEGER'}], 'labels': 'CorrAct'}, {'properties': [{'property': 'fno', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'

In [7]:
cypher_query=clean_generation(llm.generate(prompt))
cypher_query

'MATCH (:FaultEvents)-[:TYPE]->(fe:FaultEvent)-[:AFFECTS]->(:Transformer) RETURN fe\n'

In [8]:
neo4j.execute(cypher_query)

[{'fe': {'number': 52,
   'details': 'Analogue module failure. Error in analog card.'}},
 {'fe': {'number': 51,
   'details': 'Temperature measurement module failure. Error in Temperature card (RTD)'}},
 {'fe': {'number': 54, 'details': 'UPS failure'}},
 {'fe': {'number': 53, 'details': 'Communication error'}},
 {'fe': {'number': 50,
   'details': 'PLC Module Failure. Peripheral Module Failure'}},
 {'fe': {'number': 49,
   'details': 'Circuit Breaker Not Ok. Any thermal fuse signal is deactivated for more than 5s.'}},
 {'fe': {'number': 48,
   'details': 'Transformer fan circuit breaker. The thermal fuse of the trafonsmer fan is deactivated.'}},
 {'fe': {'number': 47,
   'details': 'Turbine does not couple. The stator contact input is not received after requesting\nconnection.'}}]

In [9]:
query = 'MATCH (p:Person {name: "Tom Hanks"})-[:ACTED_IN]->(m:Movie)\nRETURN COUNT(m) AS NumberOfMoviesTomHanksActedIn\n'
try:
    result = neo4j.execute(query)
except Exception as e:
    print(e)
    


{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'MAT': expected
  "ALTER"
  "CALL"
  "CREATE"
  "DEALLOCATE"
  "DELETE"
  "DENY"
  "DETACH"
  "DROP"
  "DRYRUN"
  "ENABLE"
  "FOREACH"
  "GRANT"
  "LOAD"
  "MATCH"
  "MERGE"
  "OPTIONAL"
  "REALLOCATE"
  "REMOVE"
  "RENAME"
  "RETURN"
  "REVOKE"
  "SET"
  "SHOW"
  "START"
  "STOP"
  "TERMINATE"
  "UNWIND"
  "USE"
  "USING"
  "WITH" (line 1, column 1 (offset: 0))
"MAT (p:Person {name: "Tom Hanks"})-[:ACTED_IN]->(m:Movie)"
 ^}


In [16]:
available_databases = {"Neo4j": "This database contains information about movies and actors",
                       "MongoDB": "This database contains information sales of books",
                        "MySQL" : "This database contains information about the weather"}

print(available_databases)

{'Neo4j': 'This database contains information about movies and actors', 'MongoDB': 'This database contains information sales of books', 'MySQL': 'This database contains information about the weather'}


In [17]:
history = ChatHistory()
history.add_system_message( "Please output valid json")
history.add_user_message("In how many movies did tom hanks play as a actor")
history.add_assistant_message("Tom Hanks played in 87 movies as an actor")
history.add_user_message("Dis tom hanks paly in the Movie Forrest Gump as an actor")
history.add_assistant_message("Yes, Tom Hanks played in the Movie Forrest Gump as an actor")
history.add_user_message(f"""Decide if you can asnwer the question only with the information of the chat history and if not which Database should be used to answer the question.
You have the follwing Databases available:
{available_databases}
The question is: In how many movies did tom hanks play as a actor and did he paly in the movie forest gump?
The current chat history is:
{history.format_for_model()}
Follow this example for the output:
{{
  Database: List[Literal["Neo4j", "MongoDB", "MySQL", "None"]],
  can_answer_from_history: bool,                                 
}}"""

)
decision_model = OpenAI(api_key=os.environ.get("OPENAI_KEY"))

chat_completion, *_ = decision_model.chat.completions.create(
messages= history.format_for_model(),
model="gpt-4-1106-preview",
response_format={"type": "json_object"},
temperature=0.2
).choices

content = chat_completion.message.content
reply = json.loads(content)
print(reply)

{'Database': ['Neo4j', 'None'], 'can_answer_from_history': True}
