In [1]:
node_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "node"
WITH label AS nodeLabels, collect(property) AS properties
RETURN {labels: nodeLabels, properties: properties} AS output

"""

rel_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship"
WITH label AS nodeLabels, collect(property) AS properties
RETURN {type: nodeLabels, properties: properties} AS output
"""

rel_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE type = "RELATIONSHIP" AND elementType = "node"
RETURN {source: label, relationship: property, target: other} AS output
"""

In [2]:
!pip install openai==0.28
!pip install neo4j
from neo4j import GraphDatabase
from neo4j.exceptions import CypherSyntaxError
import openai
import time

def schema_text(node_props, rel_props, rels):
    return f"""
  This is the schema representation of the Neo4j database.
  Node properties are the following:
  {node_props}
  Relationship properties are the following:
  {rel_props}
  Relationship point from source to target nodes
  {rels}
  Make sure to respect relationship types and directions
  """


class Neo4jGPTQuery:
    def __init__(self, url, user, password, openai_api_key):
        self.driver = GraphDatabase.driver(url, auth=(user, password))
        openai.api_key = openai_api_key
       # time.sleep(60)
        # construct schema
        self.schema = self.generate_schema()


    def generate_schema(self):
        node_props = self.query_database(node_properties_query)
        rel_props = self.query_database(rel_properties_query)
        rels = self.query_database(rel_query)
        return schema_text(node_props, rel_props, rels)

    def refresh_schema(self):
        self.schema = self.generate_schema()

    def get_system_message(self):
        return f"""
        Task: Generate Cypher queries to query a Neo4j graph database based on the provided schema definition.
        Instructions:
        Note: Generate a Cypher query that retrieves triples (node-relationship-node)  with a limit of 1 and query using the entity or noun in lowercase and relationship.
        Do not use any other relationship types or properties that are not provided.
        If you cannot generate a Cypher statement based on the provided schema, explain the reason to the user.
       Schema:
        {self.schema}

        Note: Do not include any explanations or apologies in your responses.
        """

    def query_database(self, neo4j_query, params={}):
        with self.driver.session() as session:
           # time.sleep(60)
            result = session.run(neo4j_query, params)
            output = [r.values() for r in result]
            output.insert(0, result.keys())
            return output

    def construct_cypher(self, question, history=None):
        messages = [
            {"role": "system", "content": self.get_system_message()},
            {"role": "user", "content": question},
        ]
        # Used for Cypher healing flows
        if history:
            messages.extend(history)
        #time.sleep(60)

        completions = openai.ChatCompletion.create(
            model="gpt-4",
            temperature=0.0,
            max_tokens=1000,
            messages=messages
        )
        return completions.choices[0].message.content
    def format_results_as_sentence(self, results,question):
        #print(results)
        if(len(results) < 2):
            return "Could not generate answer"
        print("Result from database")
        print("subject:{}".format(results[0]))
        print("relationship:{}".format(results[1]))
        print("object:{}".format(results[2]))
        prompt = f"Translate the following triples into a coherent sentence for the question {question} into a sentence:\n\n"
        triples = "\n".join([f"Source: {results[0]}, Relationship: {results[1]}, Target: {results[2]}" ])
        messages = [
        {"role": "system", "content": "You are a highly intelligent AI trained to understand relationships between entities. Also get extra information from the wikipedia url from the entitiy to answer the question"},
        {"role": "user", "content": prompt + triples}
    ]
        response = openai.ChatCompletion.create(
            model="gpt-4",  # Use an appropriate model
            messages=messages,
            temperature=0.0,
            max_tokens=1000

        )
        return response.choices[0].message.content

    def run(self, question, history=None, retry=True):
        # Construct Cypher statement
        time.sleep(60)
        cypher = self.construct_cypher(question, history)
        print("cypher generated by chatgpt")
        print(cypher)
        try:
           # time.sleep(60)
            a = self.query_database(cypher)
            #print("line 105", a)
            #response = ""
            if len(a) < 2:
              #print("IF- line number 108")
              response = "Could not generate answer"
              print(response)
              return response
            else:
              response =  self.format_results_as_sentence(a[1],question)
              #print("ELSE- line number 114")
              print(response)
              return response
        # Self-healing flow
        except CypherSyntaxError as e:
            # If out of retries
            if not retry:
              return "Invalid Cypher syntax"
        # Self-healing Cypher flow by
        # providing specific error to GPT-4
            print("Retrying")
            #time.sleep(60)
            return self.run(
                question,
                [
                    {"role": "assistant", "content": cypher},
                    {
                        "role": "user",
                        "content": f"""This query returns an error: {str(e)}
                        Give me a improved query that works without any explanations or apologies""",
                    },
                ],
                retry=False
            )




In [3]:
gds_db = Neo4jGPTQuery(
    url="neo4j+s://b68740ea.databases.neo4j.io",
    user="neo4j",
    password="_lJbg6-mIWPdVXmtjTrAgcmBJFq7iHRJ8iHRBRDuDDc",
    openai_api_key="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",
)


In [4]:
options = ["Exit", "Quit"]

while True:
  selected_word = input(f"Please write your question: ")
  if selected_word  not in options:
    query = f"""
      {selected_word.lower()}?
      """
    gds_db.run(query)
  else:
    break


Please write your question: how many phases in cell cycle
cypher generated by chatgpt
MATCH (e1:Entity)-[r:RELATIONSHIP]->(e2:Entity)
WHERE toLower(e1.name) = 'cell cycle' AND toLower(r.type) = 'phases'
RETURN e1.name, r.type, e2.name
LIMIT 1
Could not generate answer
Please write your question: what is interphase
cypher generated by chatgpt
MATCH (n:Entity)-[r:RELATIONSHIP]->(m:Entity) 
WHERE toLower(n.name) = 'interphase' 
RETURN n.name, r.type, m.name 
LIMIT 1
Could not generate answer
Please write your question: what is M-phase
cypher generated by chatgpt
MATCH (n:Entity)-[r:RELATIONSHIP]->(m:Entity) 
WHERE toLower(n.name) = 'm-phase' 
RETURN n.name, r.type, m.name LIMIT 1
Could not generate answer
Please write your question: how nucleus is reformed
cypher generated by chatgpt
MATCH (n:Entity)-[r:RELATIONSHIP]->(m:Entity)
WHERE toLower(n.name) = 'nucleus' AND toLower(r.type) = 'reformed'
RETURN n, r, m
LIMIT 1
Result from database
subject:<Node element_id='4:e23f223b-b311-4c32-9b95

KeyboardInterrupt: Interrupted by user