In [2]:
!uv pip install neo4j-graphrag neo4j openai

[2mUsing Python 3.13.5 environment at: /Users/mac/Documents/PHUNGPX/knowledge_graph_searching/.venv[0m
[2mAudited [1m3 packages[0m [2min 22ms[0m[0m


## Vector Search

https://neo4j.com/blog/developer/get-started-graphrag-python-package/

- VectorRetriever

In [1]:
from neo4j import GraphDatabase
# Demo database credentials
URI = "neo4j+s://demo.neo4jlabs.com"
AUTH = ("recommendations", "recommendations")

# Connect to Neo4j database
driver = GraphDatabase.driver(URI, auth=AUTH)

In [4]:
import os
from dotenv import load_dotenv

load_dotenv(".env")
os.environ["OPENAI_API_KEY"] = os.environ["LLM_API_KEY"]

In [5]:
from neo4j_graphrag.retrievers import VectorRetriever
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings

embedder = OpenAIEmbeddings(model="text-embedding-ada-002")

retriever = VectorRetriever(
    driver,
    index_name="moviePlotsEmbedding",
    embedder=embedder,
    return_properties=["title", "plot"],
)

In [6]:
# query_text = "A movie about the famous sinking of the Titanic"
query_text = "What movies are sad romances?"
retriever_result = retriever.search(query_text=query_text, top_k=3)
for k, item in enumerate(retriever_result.items):
    content = item.content
    score = item.metadata["score"]
    print(f"Result {k}: {content} - {score}")

Result 0: {'title': 'Autumn in New York', 'plot': 'Romantic drama about an aging playboy who falls for a sweet, but terminally ill, young woman.'} - 0.9190239906311035
Result 1: {'title': 'Bed of Roses', 'plot': 'Romantic drama about a young career girl who is swept off her feet by a shy florist, who fell in love with her after one glimpse through a shadowy window.'} - 0.9163970947265625
Result 2: {'title': 'Date Movie', 'plot': 'Spoof of romantic comedies which focuses on a man, his crush, his parents, and her father.'} - 0.9157435297966003


In [7]:
query_text = "What is the name of the movie set in 1375 in Imperial China?"
retriever_result = retriever.search(query_text=query_text, top_k=3)
for k, item in enumerate(retriever_result.items):
    content = item.content
    score = item.metadata["score"]
    print(f"Result {k}: {content} - {score}")

Result 0: {'title': 'Once Upon a Time in China (Wong Fei Hung)', 'plot': "Set in late 19th century Canton this martial arts film depicts the stance taken by the legendary martial arts hero Wong Fei-Hung (1847-1924) against foreign forces' (English, French and ..."} - 0.920971155166626
Result 1: {'title': 'Once Upon a Time in China II (Wong Fei-hung Ji Yi: Naam yi dong ji keung)', 'plot': 'In the sequel to the Tsui Hark classic, Wong Fei-Hung faces The White Lotus society, a fanatical cult seeking to drive the Europeans out of China through violence, even attacking Chinese ...'} - 0.9179518222808838
Result 2: {'title': 'Last Emperor, The', 'plot': 'The story of the final Emperor of China.'} - 0.9168814420700073


In [8]:
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.generation import GraphRAG

# LLM
# Note: the OPENAI_API_KEY must be in the env vars
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})

# Initialize the RAG pipeline
rag = GraphRAG(retriever=retriever, llm=llm)

# Query the graph
# query_text = "What movies are sad romances?"
query_text = "What is the name of the movie set in 1375 in Imperial China?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 5})
print(response.answer)

The provided context does not include a movie set in 1375 in Imperial China.


## Hybrid Retrieval (Vector Search + Full Text Search)

https://neo4j.com/blog/developer/hybrid-retrieval-graphrag-python-package/

- HybridRetriever

In [22]:
from neo4j import GraphDatabase

# Demo database credentials
URI = "neo4j+s://demo.neo4jlabs.com"
AUTH = ("recommendations", "recommendations")
# Connect to Neo4j database
driver = GraphDatabase.driver(URI, auth=AUTH)

In [23]:
from neo4j import GraphDatabase
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.retrievers import VectorRetriever

driver = GraphDatabase.driver(URI, auth=AUTH)
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")
retriever = VectorRetriever(
    driver,
    index_name="moviePlotsEmbedding",
    embedder=embedder,
    return_properties=["title", "plot"],
)

In [24]:
query_text = "What is the name of the movie set in 1375 in Imperial China?"
retriever_result = retriever.search(query_text=query_text, top_k=3)
print(retriever_result)

items=[RetrieverResultItem(content='{\'title\': \'Once Upon a Time in China (Wong Fei Hung)\', \'plot\': "Set in late 19th century Canton this martial arts film depicts the stance taken by the legendary martial arts hero Wong Fei-Hung (1847-1924) against foreign forces\' (English, French and ..."}', metadata={'score': 0.920971155166626, 'nodeLabels': ['Movie'], 'id': '4:6636a433-9f85-4f47-b4a7-e2cd79149f79:4789'}), RetrieverResultItem(content="{'title': 'Once Upon a Time in China II (Wong Fei-hung Ji Yi: Naam yi dong ji keung)', 'plot': 'In the sequel to the Tsui Hark classic, Wong Fei-Hung faces The White Lotus society, a fanatical cult seeking to drive the Europeans out of China through violence, even attacking Chinese ...'}", metadata={'score': 0.9179518222808838, 'nodeLabels': ['Movie'], 'id': '4:6636a433-9f85-4f47-b4a7-e2cd79149f79:4790'}), RetrieverResultItem(content="{'title': 'Last Emperor, The', 'plot': 'The story of the final Emperor of China.'}", metadata={'score': 0.9168814

In [25]:
for k, item in enumerate(retriever_result.items):
    content = item.content
    score = item.metadata["score"]
    print(f"Result {k}: {content} - {score}")

Result 0: {'title': 'Once Upon a Time in China (Wong Fei Hung)', 'plot': "Set in late 19th century Canton this martial arts film depicts the stance taken by the legendary martial arts hero Wong Fei-Hung (1847-1924) against foreign forces' (English, French and ..."} - 0.920971155166626
Result 1: {'title': 'Once Upon a Time in China II (Wong Fei-hung Ji Yi: Naam yi dong ji keung)', 'plot': 'In the sequel to the Tsui Hark classic, Wong Fei-Hung faces The White Lotus society, a fanatical cult seeking to drive the Europeans out of China through violence, even attacking Chinese ...'} - 0.9179518222808838
Result 2: {'title': 'Last Emperor, The', 'plot': 'The story of the final Emperor of China.'} - 0.9168814420700073


In [26]:
from neo4j import GraphDatabase
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.retrievers import HybridRetriever

driver = GraphDatabase.driver(URI, auth=AUTH)
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")
retriever = HybridRetriever(
    driver=driver,
    vector_index_name="moviePlotsEmbedding",
    fulltext_index_name="movieFulltext",
    embedder=embedder,
    return_properties=["title", "plot"],
)

In [27]:
query_text = "What is the name of the movie set in 1375 in Imperial China?"

retriever_result = retriever.search(query_text=query_text, top_k=3)

for k, item in enumerate(retriever_result.items):
    content = item.content
    score = item.metadata["score"]
    print(f"Result {k}: {content} - {score}")

Result 0: {'title': 'Musa the Warrior (Musa)', 'plot': '1375. Nine Koryo warriors, envoys exiled by Imperial China, battle to protect a Chinese Ming Princess from Mongolian troops.'} - 1.0
Result 1: {'title': 'Once Upon a Time in China (Wong Fei Hung)', 'plot': "Set in late 19th century Canton this martial arts film depicts the stance taken by the legendary martial arts hero Wong Fei-Hung (1847-1924) against foreign forces' (English, French and ..."} - 1.0
Result 2: {'title': 'Once Upon a Time in China II (Wong Fei-hung Ji Yi: Naam yi dong ji keung)', 'plot': 'In the sequel to the Tsui Hark classic, Wong Fei-Hung faces The White Lotus society, a fanatical cult seeking to drive the Europeans out of China through violence, even attacking Chinese ...'} - 0.9967439367893461


In [33]:
query_text = "Who were the actors in the movie about the magic jungle board game?"

retriever_result = retriever.search(query_text=query_text, top_k=3)

for k, item in enumerate(retriever_result.items):
    content = item.content
    score = item.metadata["score"]
    print(f"Result {k}: {content} - {score}")

Result 0: {'title': 'Jungle Book, The', 'plot': "Rudyard Kipling's classic tale of Mowgli, the orphaned jungle boy raised by wolves, and how he becomes king of the jungle."} - 1.0
Result 1: {'title': 'Jumanji', 'plot': 'When two kids find and play a magical board game, they release a man trapped for decades in it and a host of dangers that can only be stopped by finishing the game.'} - 1.0
Result 2: {'title': 'Welcome to the Jungle', 'plot': 'A company retreat on a tropical island goes terribly awry.'} - 0.9773221925250933


In [28]:
from neo4j_graphrag.generation import GraphRAG

# LLM
# Note: the OPENAI_API_KEY must be in the env vars
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})

# Initialize the RAG pipeline
rag = GraphRAG(retriever=retriever, llm=llm)

In [29]:
# Query the graph
query_text = "What is the name of the movie set in 1375 in Imperial China?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 3})
print(response.answer)

Musa the Warrior (Musa)


In [37]:
# Query the graph
query_text = "Who were the actors in the movie about the magic jungle board game?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 3})
print(response.answer)

The movie about the magic jungle board game is "Jumanji." The actors in the original 1995 film include Robin Williams, Kirsten Dunst, Bonnie Hunt, and Bradley Pierce. In the 2017 sequel "Jumanji: Welcome to the Jungle," the main actors are Dwayne Johnson, Kevin Hart, Jack Black, and Karen Gillan.


## Enrich Vector Search with Graph Traversal

https://neo4j.com/blog/developer/graph-traversal-graphrag-python-package/

- VectorCypherRetriever

In [4]:
retrieval_query = """
MATCH
(actor:Actor)-[:ACTED_IN]->(node)
RETURN
node.title AS movie_title,
node.plot AS movie_plot, 
collect(actor.name) AS actors;
"""

In [5]:
from neo4j import GraphDatabase
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.retrievers import VectorCypherRetriever

driver = GraphDatabase.driver(URI, auth=AUTH)
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")
vc_retriever = VectorCypherRetriever(
    driver,
    index_name="moviePlotsEmbedding",
    embedder=embedder,
    retrieval_query=retrieval_query,
)

In [6]:
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.generation import GraphRAG

# LLM
# Note: the OPENAI_API_KEY must be in the env vars
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})

# Initialize the RAG pipeline
rag = GraphRAG(retriever=vc_retriever, llm=llm)

In [7]:
# Query the graph
query_text = "Who were the actors in the movie about the magic jungle board game?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 3})
print(response.answer)

The actors in the movie about the magic jungle board game, "Jumanji," were Robin Williams, Bradley Pierce, Kirsten Dunst, and Jonathan Hyde.


In [8]:
query_text = "What are the names of the actors in the movie set in 1375 in Imperial China?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 3})
print(response.answer)

The context provided does not include information about a movie set in 1375 in Imperial China, so I cannot provide the names of the actors for that specific movie.


## Hybrid Cypher Retriever

https://neo4j.com/blog/developer/enhancing-hybrid-retrieval-graphrag-python-package/

- HybridCypherRetriever

In [9]:
retrieval_query = """
MATCH
(actor:Actor)-[:ACTED_IN]->(node)
RETURN
node.title AS movie_title,
node.plot AS movie_plot, 
collect(actor.name) AS actors;
"""

In [10]:
from neo4j import GraphDatabase
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.retrievers import HybridCypherRetriever

driver = GraphDatabase.driver(URI, auth=AUTH)
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")
retriever = HybridCypherRetriever(
    driver=driver,
    vector_index_name="moviePlotsEmbedding",
    fulltext_index_name="movieFulltext",
    retrieval_query=retrieval_query,
    embedder=embedder,
)

In [12]:
query_text = "What are the names of the actors in the movie set in 1375 in Imperial China?"
retriever_result = retriever.search(query_text=query_text, top_k=3)
for k, item in enumerate(retriever_result.items):
    content = item.content
    score = item.metadata
    print(f"Result {k}: {content} - {score}")

Result 0: <Record movie_title='Musa the Warrior (Musa)' movie_plot='1375. Nine Koryo warriors, envoys exiled by Imperial China, battle to protect a Chinese Ming Princess from Mongolian troops.' actors=['Irrfan Khan', 'Ziyi Zhang', 'Sung-kee Ahn', 'Jin-mo Ju']> - None
Result 1: <Record movie_title='Once Upon a Time in China (Wong Fei Hung)' movie_plot="Set in late 19th century Canton this martial arts film depicts the stance taken by the legendary martial arts hero Wong Fei-Hung (1847-1924) against foreign forces' (English, French and ..." actors=['Jet Li', 'Biao Yuen', 'Rosamund Kwan', 'Jacky Cheung']> - None
Result 2: <Record movie_title='Once Upon a Time in China III (Wong Fei-hung tsi sam: Siwong tsangba)' movie_plot="Master Wong and his disciples enroll in the 'Dancing Lion Competition' to stop an assassination plot and to battle an arrogant, deceitful opponent." actors=['Xin Xin Xiong', 'Siu Chung Mok', 'Rosamund Kwan', 'Jet Li']> - None


In [13]:
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.generation import GraphRAG

# LLM
# Note: the OPENAI_API_KEY must be in the env vars
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})

# Initialize the RAG pipeline
rag = GraphRAG(retriever=retriever, llm=llm)

In [14]:
query_text = "What are the names of the actors in the movie set in 1375 in Imperial China?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 3})
print(response.answer)

The actors in the movie set in 1375 in Imperial China, "Musa the Warrior (Musa)," are Irrfan Khan, Ziyi Zhang, Sung-kee Ahn, and Jin-mo Ju.
