In [1]:
%load_ext dotenv
%dotenv

%load_ext autoreload
%autoreload 2

In [2]:
from utils import chat, neo4j_driver
from schema_utils import get_schema

In [4]:
prompt_template = """
Instructions: 
Generate Cypher statement to query a graph database to get the data to answer the user question below.

Graph Database Schema:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided in the schema.
{schema}

Terminology mapping:
This section is helpful to map terminology between the user question and the graph database schema.
{terminology}

Examples:
The following examples provide useful patterns for querying the graph database.
{examples}

Format instructions:
Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to 
construct a Cypher statement.
Do not include any text except the generated Cypher statement.
ONLY RESPOND WITH CYPHER, NO CODEBLOCKS.

User question: {question}
"""

In [5]:
schema_string = get_schema(neo4j_driver)
print(schema_string)

Node properties:
Movie {title: STRING, released: INTEGER, tagline: STRING}
Person {name: STRING, born: INTEGER}
Relationship properties:
ACTED_IN {roles: LIST}
REVIEWED {rating: INTEGER, summary: STRING}
The relationships:
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:PRODUCED]->(:Movie)
(:Person)-[:WROTE]->(:Movie)
(:Person)-[:FOLLOWS]->(:Person)
(:Person)-[:REVIEWED]->(:Movie)


In [6]:
terminology_string = """
Persons: When a user asks about a person by trade like actor, writer, director, producer, reviewer, they are referring to a node with the label 'Person'.
Movies: When a user asks about a film or movie, they are referring to a node with the label Movie.
"""

In [7]:
examples = [["Who are the two people acted in most movies together?", "MATCH (p1:Person)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(p2:Person) WHERE p1 <> p2 RETURN p1.name, p2.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1"]]

In [23]:

question = "Who directed the most movies?"

formatted_examples = "\n".join([f"Question: {e[0]}\nCypher: {e[1]}" for i, e in enumerate(examples)])

full_prompt = prompt_template.format(question=question, schema=schema_string, terminology=terminology_string,examples=formatted_examples)
print(full_prompt)


Instructions: 
Generate Cypher statement to query a graph database to get the data to answer the user question below.

Graph Database Schema:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided in the schema.
Node properties:
Movie {title: STRING, released: INTEGER, tagline: STRING}
Person {name: STRING, born: INTEGER}
Relationship properties:
ACTED_IN {roles: LIST}
REVIEWED {rating: INTEGER, summary: STRING}
The relationships:
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:PRODUCED]->(:Movie)
(:Person)-[:WROTE]->(:Movie)
(:Person)-[:FOLLOWS]->(:Person)
(:Person)-[:REVIEWED]->(:Movie)

Terminology mapping:
This section is helpful to map terminology between the user question and the graph database schema.

Persons: When a user asks about a person by trade like actor, writer, director, producer, reviewer, they are referring to a node with the label 'Person'.
Movies

In [24]:
cypher = chat(messages=[{"role": "user", "content": full_prompt}])
print(cypher)

MATCH (p:Person)-[:DIRECTED]->(m:Movie) RETURN p.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1


## Using the provided class

In [26]:
from text2cypher import Text2Cypher
t2c = Text2Cypher(driver=neo4j_driver)

t2c.set_prompt_section("question", question)
t2c.set_prompt_section("terminology", terminology_string)
t2c.set_prompt_section("examples", formatted_examples)

cypher_from_class = t2c.generate_cypher()

print(cypher_from_class)

MATCH (p:Person)-[:DIRECTED]->(m:Movie) RETURN p.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1
