In [180]:
from anthropic import Anthropic
import pandas as pd
import neo4j
from neo4j import GraphDatabase
from IPython.display import display

In [3]:
client = Anthropic()
MODEL_NAME = "claude-3-opus-20240229"

In [None]:
driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "neo4j"))

In [250]:
def to_dataframe(query, driver, database):
    records, summary, keys = driver.execute_query(query, database=database)
    data = []
    for record in records:
        if isinstance(record, neo4j._data.Record):
            for k, v in record.items():
                """
                If both k and v are str type - we have terminal values 
                We check if there is dot in key, split key on dot and take the right size
                For the value:
                       if its a string we will directly assign it
                       if not string eg: a node : we will just take v instead and append to the df
                """
                if isinstance(v, str):                    
                    key = k.split('.')[-1] if '.' in k else k
                    data.append({key: v})
                elif isinstance(v, neo4j.graph.Node):
                    data.append(v)
                else:
                    print("Unknown type", type(v))
    df = pd.DataFrame.from_records(data)
    return df

In [251]:
# testing
_q = "MATCH (p:Person)-[d:DIRECTED]->(m:Movie) RETURN p.name AS director, m.title AS movie"
print(to_dataframe(_q, driver=driver, database="neo4j"))

           director                  movie
0    Lana Wachowski                    NaN
1               NaN             The Matrix
2    Andy Wachowski                    NaN
3               NaN             The Matrix
4    Lana Wachowski                    NaN
..              ...                    ...
83              NaN   Charlie Wilson's War
84  Robert Zemeckis                    NaN
85              NaN      The Polar Express
86   Penny Marshall                    NaN
87              NaN  A League of Their Own

[88 rows x 2 columns]


In [227]:
_q = "MATCH (n:Movie) RETURN n LIMIT 25"
print(to_dataframe(_q, driver=driver, database="neo4j"))

[<Record n=<Node element_id='4:8e34a30c-ce5c-42f4-90db-6003682305d4:0' labels=frozenset({'Movie'}) properties={'tagline': 'Welcome to the Real World', 'title': 'The Matrix', 'released': 1999}>>, <Record n=<Node element_id='4:8e34a30c-ce5c-42f4-90db-6003682305d4:9' labels=frozenset({'Movie'}) properties={'tagline': 'Free your mind', 'title': 'The Matrix Reloaded', 'released': 2003}>>, <Record n=<Node element_id='4:8e34a30c-ce5c-42f4-90db-6003682305d4:10' labels=frozenset({'Movie'}) properties={'tagline': 'Everything that has a beginning has an end', 'title': 'The Matrix Revolutions', 'released': 2003}>>, <Record n=<Node element_id='4:8e34a30c-ce5c-42f4-90db-6003682305d4:11' labels=frozenset({'Movie'}) properties={'tagline': 'Evil has its winning ways', 'title': "The Devil's Advocate", 'released': 1997}>>, <Record n=<Node element_id='4:8e34a30c-ce5c-42f4-90db-6003682305d4:15' labels=frozenset({'Movie'}) properties={'tagline': "In the heart of the nation's capital, in a courthouse of the 

In [178]:
query = "MATCH (m:Movie {title: 'Matrix'})<-[:ACTED_IN]-(a:Actor) RETURN a.name"
display(to_dataframe(query, driver, "neo4j"))

In [177]:
with driver.session(database="neo4j") as session:
    response = session.run("MATCH (n:Movie) RETURN n LIMIT 25")
    for res in response:
        for _,value in res.items():
           for k,v in value.items():
               print(k, v)

In [19]:
import json

# TODO: this seems like the data on the table vs schema of the graph
with driver.session(database="neo4j") as session:
    response = session.run("CALL apoc.meta.schema()")
    for res in response:
       for val in  res.values():
           print(json.dumps(val, ensure_ascii=False, indent=4))

{
    "Movie": {
        "relationships": {
            "ACTED_IN": {
                "count": 180,
                "properties": {
                    "roles": {
                        "array": true,
                        "indexed": false,
                        "existence": false,
                        "type": "LIST"
                    }
                },
                "direction": "in",
                "labels": [
                    "Person",
                    "Actor"
                ]
            },
            "REVIEWED": {
                "count": 8,
                "properties": {
                    "summary": {
                        "array": false,
                        "indexed": false,
                        "existence": false,
                        "type": "STRING"
                    },
                    "rating": {
                        "array": false,
                        "indexed": false,
                        "existence": false,
           

In [76]:
def get_schema(driver) -> str:
    with driver.session(database="neo4j") as session:
        response = session.run("CALL db.schema.visualization()")
        
        nodes = []
        relationships = []

        for res in response:
            for val in res.values():
                for comp in val: # here val is either array of node or relationship
                    #print(type(comp))                
                    if isinstance(comp, neo4j.graph.Node):
                        nodes.append(list(comp.labels)[0])
                    else:
                        relationships.append(("(:{src})-[{rel}]->(:{dst})".format(src=list(comp.start_node.labels)[0], rel=comp.type, dst=list(comp.end_node.labels)[0])))
        node_str = "\n".join(nodes)
        relation_str = "\n".join(relationships)

        return "\n".join(["Nodes:\n", node_str, "\nRelations:\n", relation_str])                       

In [90]:
schema =  get_schema(driver)

### Generate Cypher Query using Neo4J

In [92]:
def ask_claude(query, schema):
    prompt = f""" Here is the schema for a database
    {schema}

    Given this schema, can you output a Cypher to answer the following questions? Only output the cypher query and nothing else.

    Question:  {query}
    """
    
    response = client.messages.create(
        model=MODEL_NAME,
        max_tokens=2048,
        messages = [
            {'role': 'user', 'content': prompt}
        ]
    )

    return response.content[0].text

In [94]:
res = ask_claude("Get all movies that start with T", schema)

print(res)

MATCH (m:Movie)
WHERE m.title STARTS WITH 'T'
RETURN m


In [179]:
res = ask_claude("Get the cast of the movie the matrix", schema)

print(res)

MATCH (m:Movie {title: 'The Matrix'})<-[:ACTED_IN]-(a:Actor)
RETURN m.title AS movie, collect(a.name) AS cast


In [248]:
def nl_to_df(query, schema=schema, driver=driver, database="neo4j", verbose=False) -> pd.DataFrame:
    """
    Given a query in natural language. Convert it into a dataframe
    """
    res = ask_claude(query, schema) # this return cypher query
    if verbose:
        print(res)
    if res:
        df = to_dataframe(res, driver, database) # this converts cypher response to dataframe
        return df
    else:
        return pd.DataFrame.empty()


In [230]:
display(nl_to_df("Show me 10 movies"))

Unnamed: 0,released,tagline,title
0,1999,Welcome to the Real World,The Matrix
1,2003,Free your mind,The Matrix Reloaded
2,2003,Everything that has a beginning has an end,The Matrix Revolutions
3,1997,Evil has its winning ways,The Devil's Advocate
4,1992,"In the heart of the nation's capital, in a cou...",A Few Good Men
5,1986,"I feel the need, the need for speed.",Top Gun
6,2000,The rest of his life begins now.,Jerry Maguire
7,1986,"For some, it's the last real taste of innocenc...",Stand By Me
8,1997,A comedy from the heart that goes for the throat.,As Good as It Gets
9,1998,After life there is more. The end is just the ...,What Dreams May Come


In [229]:
display(nl_to_df("Show me 10 movies with their title only with no repeats"))

Unnamed: 0,title
0,The Matrix
1,The Matrix Reloaded
2,The Matrix Revolutions
3,The Devil's Advocate
4,A Few Good Men
5,Top Gun
6,Jerry Maguire
7,Stand By Me
8,As Good as It Gets
9,What Dreams May Come


In [231]:
display(nl_to_df("Who are the cast of A Few Good Men?",  verbose=True))

MATCH (m:Movie {title: "A Few Good Men"})<-[:ACTED_IN]-(p:Person)
RETURN p.name


Unnamed: 0,name
0,James Marshall
1,Kevin Pollak
2,J.T. Walsh
3,Aaron Sorkin
4,Cuba Gooding Jr.
5,Christopher Guest
6,Noah Wyle
7,Kiefer Sutherland
8,Kevin Bacon
9,Demi Moore


In [232]:
display(nl_to_df("Movies by Tom Hanks",  verbose=True))

MATCH (tom:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(movie:Movie)
RETURN movie


Unnamed: 0,released,tagline,title
0,1998,At odds in life... in love on-line.,You've Got Mail
1,1995,"Houston, we have a problem.",Apollo 13
2,1990,"A story of love, lava and burning desire.",Joe Versus the Volcano
3,1996,In every life there comes a time when that thi...,That Thing You Do
4,2012,Everything is connected,Cloud Atlas
5,2006,Break The Codes,The Da Vinci Code
6,1993,"What if someone you never met, someone you nev...",Sleepless in Seattle
7,1992,Once in a lifetime you get a chance to do some...,A League of Their Own
8,1999,Walk a mile you'll never forget.,The Green Mile
9,2007,A stiff drink. A little mascara. A lot of nerv...,Charlie Wilson's War


In [233]:
display(nl_to_df("Show me movies by Tom Hanks released after 2000",  verbose=True))

MATCH (p:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie)
WHERE m.released > 2000
RETURN m


Unnamed: 0,released,tagline,title
0,2012,Everything is connected,Cloud Atlas
1,2006,Break The Codes,The Da Vinci Code
2,2007,A stiff drink. A little mascara. A lot of nerv...,Charlie Wilson's War
3,2004,This Holiday Season… Believe,The Polar Express


In [234]:
display(nl_to_df("Show movies by Tom Cruise and Tom Hanks",  verbose=True))

MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
WHERE a.name IN ['Tom Cruise', 'Tom Hanks']
RETURN m


Unnamed: 0,name
0,Top Gun


In [240]:
display(nl_to_df("Who is the director of top gun?"))

Unnamed: 0,name
0,Tony Scott


In [253]:
display(nl_to_df("Who are some of the directors of Tom Cruise", verbose=True))

MATCH (tom:Person {name: "Tom Cruise"})-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(director:Person)
RETURN DISTINCT director.name


Unnamed: 0,name
0,Cameron Crowe
1,Tony Scott
2,Rob Reiner
