## Querying Knowledge Graphs with Cypher

### Import packages and set up Neo4

In [None]:
# !pip install neo4j

In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
import os

from langchain_community.graphs import Neo4jGraph

# Warning control
import warnings
warnings.filterwarnings("ignore")

## Creating a database on Cloud

- First get credentials

In [2]:
load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')

In [4]:
from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)

with GraphDatabase.driver(NEO4J_URI, auth=AUTH) as driver:
    driver.verify_connectivity()

- Initialize a knowledge graph instance using LangChain's Neo4j integration

In [5]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

- Initially the default graph database is empty as can be seen by using the next query

In [8]:
cypher = """
  MATCH (n) 
  RETURN count(n)
  """
result = kg.query(cypher)
result

[{'count(n)': 0}]

- I will populate it with data in the file Data/create_graph_database.cypher
- Uncomment and execute the following code if you have not populated your database.
- Additionally, the file needs to be uncommented: just select all the content in the cypher file and uncomment it, the run the following code.

In [22]:
# with open('Data/create_graph_database.cypher', 'r') as file:
#     setup_script = file.read()

# # Split and execute each query from your file
# queries = [q.strip() for q in setup_script.split(';') if q.strip()]

# for query in queries:
#     try:
#         result = kg.query(query)
#         print(f"✓ Executed: {query[:50]}...")
#     except Exception as e:
#         print(f"✗ Error executing query: {e}")

## Exploring the Neo4j Graph Database with some basic commands

In [25]:
cypher = """
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
  """
result = kg.query(cypher)
result

[{'numberOfNodes': 171}]

In [26]:
print(f"There are {result[0]['numberOfNodes']} nodes in this graph.")

There are 171 nodes in this graph.


In [41]:
cypher = """
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
  """
result = kg.query(cypher)
result

[{'numberOfNodes': 171}]

- Find all labels

In [36]:
cypher = """
MATCH (n) 
RETURN DISTINCT labels(n) AS nodeLabels
"""
result = kg.query(cypher)
result

[{'nodeLabels': ['Movie']}, {'nodeLabels': ['Person']}]

In [37]:
cypher = """
MATCH (n) 
RETURN labels(n) AS nodeLabels, count(n) AS count
ORDER BY count DESC
"""
result = kg.query(cypher)
result

[{'nodeLabels': ['Person'], 'count': 133},
 {'nodeLabels': ['Movie'], 'count': 38}]

- Find all type of edges

In [38]:
cypher = """
MATCH ()-[r]->() 
RETURN DISTINCT type(r) AS relationshipType
ORDER BY relationshipType
"""
result = kg.query(cypher)
result

[{'relationshipType': 'ACTED_IN'},
 {'relationshipType': 'DIRECTED'},
 {'relationshipType': 'FOLLOWS'},
 {'relationshipType': 'PRODUCED'},
 {'relationshipType': 'REVIEWED'},
 {'relationshipType': 'WROTE'}]

- Match only the `Movie` nodes by specifying the node label

In [39]:
cypher = """
  MATCH (m:Movie) 
  RETURN count(m) AS numberOfMovies
  """
kg.query(cypher)

[{'numberOfMovies': 38}]

- Match only the `Person` nodes

In [40]:
cypher = """
  MATCH (people:Person) 
  RETURN count(people) AS numberOfPeople
  """
kg.query(cypher)

[{'numberOfPeople': 133}]

- Show both of them at the same time

In [35]:
cypher = """
MATCH (p:Person) 
RETURN 'People' AS type, count(p) AS count
UNION
MATCH (m:Movie) 
RETURN 'Movies' AS type, count(m) AS count
"""
result = kg.query(cypher)
result

[{'type': 'People', 'count': 133}, {'type': 'Movies', 'count': 38}]

- Match a single person by specifying the value of the `name` property on the `Person` node

In [42]:
cypher = """
  MATCH (tom:Person {name:"Tom Hanks"}) 
  RETURN tom
  """
kg.query(cypher)

[{'tom': {'born': 1956, 'name': 'Tom Hanks'}}]

- Match a single `Movie` by specifying the value of the `title` property

In [43]:
cypher = """
  MATCH (cloudAtlas:Movie {title:"Cloud Atlas"}) 
  RETURN cloudAtlas
  """
kg.query(cypher)

[{'cloudAtlas': {'tagline': 'Everything is connected',
   'title': 'Cloud Atlas',
   'released': 2012}}]

- Return only the `released` property of the matched `Movie` node

In [44]:
cypher = """
  MATCH (cloudAtlas:Movie {title:"Cloud Atlas"}) 
  RETURN cloudAtlas.released
  """
kg.query(cypher)

[{'cloudAtlas.released': 2012}]

- Return two properties

In [45]:
cypher = """
  MATCH (cloudAtlas:Movie {title:"Cloud Atlas"}) 
  RETURN cloudAtlas.released, cloudAtlas.tagline
  """
kg.query(cypher)

[{'cloudAtlas.released': 2012,
  'cloudAtlas.tagline': 'Everything is connected'}]

### Cypher patterns with conditional matching

In [47]:
cypher = """
  MATCH (nineties:Movie) 
  WHERE nineties.released >= 1990 
    AND nineties.released < 2000 
  RETURN nineties.title
  """
kg.query(cypher)


[{'nineties.title': 'The Matrix'},
 {'nineties.title': "The Devil's Advocate"},
 {'nineties.title': 'A Few Good Men'},
 {'nineties.title': 'As Good as It Gets'},
 {'nineties.title': 'What Dreams May Come'},
 {'nineties.title': 'Snow Falling on Cedars'},
 {'nineties.title': "You've Got Mail"},
 {'nineties.title': 'Sleepless in Seattle'},
 {'nineties.title': 'Joe Versus the Volcano'},
 {'nineties.title': 'When Harry Met Sally'},
 {'nineties.title': 'That Thing You Do'},
 {'nineties.title': 'The Birdcage'},
 {'nineties.title': 'Unforgiven'},
 {'nineties.title': 'Johnny Mnemonic'},
 {'nineties.title': 'The Green Mile'},
 {'nineties.title': 'Hoffa'},
 {'nineties.title': 'Apollo 13'},
 {'nineties.title': 'Twister'},
 {'nineties.title': 'Bicentennial Man'},
 {'nineties.title': 'A League of Their Own'}]

### Pattern matching with multiple nodes

In [55]:
cypher = """
MATCH (actor:Person)-[acted_in:ACTED_IN]->(movie:Movie) 
RETURN actor.name, type(acted_in) AS relationship, movie.title LIMIT 10
"""
result = kg.query(cypher)
result

[{'actor.name': 'Keanu Reeves',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix'},
 {'actor.name': 'Carrie-Anne Moss',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix'},
 {'actor.name': 'Laurence Fishburne',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix'},
 {'actor.name': 'Hugo Weaving',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix'},
 {'actor.name': 'Emil Eifrem',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix'},
 {'actor.name': 'Keanu Reeves',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix Reloaded'},
 {'actor.name': 'Carrie-Anne Moss',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix Reloaded'},
 {'actor.name': 'Laurence Fishburne',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix Reloaded'},
 {'actor.name': 'Hugo Weaving',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix Reloaded'},
 {'actor.name': 'Keanu Reeves',
  'relationship': 'ACTED_IN',
  'movie.title': 'The Matrix Revoluti

In [57]:
cypher = """
  MATCH (tom:Person {name: "Tom Hanks"})-[acted_in:ACTED_IN]->(tomHanksMovies:Movie) 
  RETURN tom.name, type(acted_in) AS ActedIn, tomHanksMovies.title
  """
kg.query(cypher)

[{'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': "You've Got Mail"},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'Sleepless in Seattle'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'Joe Versus the Volcano'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'That Thing You Do'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'Cloud Atlas'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'The Da Vinci Code'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'The Green Mile'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'Apollo 13'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': 'Cast Away'},
 {'tom.name': 'Tom Hanks',
  'ActedIn': 'ACTED_IN',
  'tomHanksMovies.title': "Charlie Wilson's War"},
 {'tom.name': 'T

In [58]:
cypher = """
  MATCH (tom:Person {name:"Tom Hanks"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors) 
  RETURN coActors.name, m.title
  """
kg.query(cypher)

[{'coActors.name': 'Meg Ryan', 'm.title': "You've Got Mail"},
 {'coActors.name': 'Greg Kinnear', 'm.title': "You've Got Mail"},
 {'coActors.name': 'Parker Posey', 'm.title': "You've Got Mail"},
 {'coActors.name': 'Dave Chappelle', 'm.title': "You've Got Mail"},
 {'coActors.name': 'Steve Zahn', 'm.title': "You've Got Mail"},
 {'coActors.name': 'Meg Ryan', 'm.title': 'Sleepless in Seattle'},
 {'coActors.name': 'Rita Wilson', 'm.title': 'Sleepless in Seattle'},
 {'coActors.name': 'Bill Pullman', 'm.title': 'Sleepless in Seattle'},
 {'coActors.name': 'Victor Garber', 'm.title': 'Sleepless in Seattle'},
 {'coActors.name': "Rosie O'Donnell", 'm.title': 'Sleepless in Seattle'},
 {'coActors.name': 'Meg Ryan', 'm.title': 'Joe Versus the Volcano'},
 {'coActors.name': 'Nathan Lane', 'm.title': 'Joe Versus the Volcano'},
 {'coActors.name': 'Charlize Theron', 'm.title': 'That Thing You Do'},
 {'coActors.name': 'Liv Tyler', 'm.title': 'That Thing You Do'},
 {'coActors.name': 'Hugo Weaving', 'm.title

### Delete data from the graph

In [None]:
cypher = """
MATCH (emil:Person {name:"Emil Eifrem"})-[actedIn:ACTED_IN]->(movie:Movie)
RETURN emil.name, movie.title
"""
kg.query(cypher)

[{'emil.name': 'Emil Eifrem', 'movie.title': 'The Matrix'}]

In [60]:
cypher = """
MATCH (emil:Person {name:"Emil Eifrem"})-[actedIn:ACTED_IN]->(movie:Movie)
DELETE actedIn
"""
kg.query(cypher)

[]

- Notice that the labels still exist, the only thing that was erased was the edge:

In [65]:
cypher = """
MATCH (keanu:Person {name:"Keanu Reeves"})
RETURN keanu AS result
UNION
MATCH (movie:Movie {title:"The Matrix"}) 
RETURN movie AS result
"""
result = kg.query(cypher)
result

[{'result': {'born': 1964, 'name': 'Keanu Reeves'}},
 {'result': {'tagline': 'Welcome to the Real World',
   'title': 'The Matrix',
   'released': 1999}}]

### Adding data to the graph

In [66]:
cypher = """
CREATE (andreas:Person {name:"Andreas"})
RETURN andreas
"""

kg.query(cypher)

[{'andreas': {'name': 'Andreas'}}]

In [67]:
cypher = """
MATCH (andreas:Person {name:"Andreas"}), (emil:Person {name:"Emil Eifrem"})
MERGE (andreas)-[hasRelationship:WORKS_WITH]->(emil)
RETURN andreas, hasRelationship, emil
"""
kg.query(cypher)

[{'andreas': {'name': 'Andreas'},
  'hasRelationship': ({'name': 'Andreas'},
   'WORKS_WITH',
   {'born': 1978, 'name': 'Emil Eifrem'}),
  'emil': {'born': 1978, 'name': 'Emil Eifrem'}}]

- Notice also that the new edge type was created:

In [71]:
cypher = """
MATCH ()-[r]->()
RETURN DISTINCT type(r)
"""
result = kg.query(cypher)
result

[{'type(r)': 'ACTED_IN'},
 {'type(r)': 'DIRECTED'},
 {'type(r)': 'PRODUCED'},
 {'type(r)': 'WROTE'},
 {'type(r)': 'FOLLOWS'},
 {'type(r)': 'REVIEWED'},
 {'type(r)': 'WORKS_WITH'}]