In [15]:
import neo4j
from neo4j_graphrag_custom.kg_retriever import KGRetriever
from neo4j_graphrag_custom.kg_indexer import KGIndexer
import os
import json
from dotenv import load_dotenv
from neo4j_graphrag.embeddings import SentenceTransformerEmbeddings
from neo4j_graphrag.retrievers import (
    VectorRetriever,
    VectorCypherRetriever,
    HybridRetriever,
    HybridCypherRetriever,
    Text2CypherRetriever
)

# 0. Initial setup

Note: this notebook assumes the existence of an *indexed* knowledge graph (with the full text and the embeddings indexed) in the Neo4j database that is called. 

In [8]:
# Load configuration and setup

script_dir = os.getcwd()

# script_dir = os.path.dirname(os.path.abspath(__file__))  # Uncomment if running as a script

# Load environment variables from a .env file
dotenv_path = os.path.join(script_dir, '.env')
load_dotenv(dotenv_path, override=True)

# Open configuration file from JSON format
config_path = os.path.join(script_dir, 'kg_building_config.json')  # Configuration file of the knowledge graph builder
with open(config_path, 'r') as kg_build_config_file:
    build_config = json.load(kg_build_config_file)
config_path = os.path.join(script_dir, 'kg_retrieval_config.json')  # Configuration file of the knowledge graph retriever
with open(config_path, 'r') as kg_retr_config_file:
    retr_config = json.load(kg_retr_config_file)

# Neo4j connection
neo4j_uri = os.getenv('NEO4J_URI')
neo4j_username = os.getenv('NEO4J_USERNAME')
neo4j_password = os.getenv('NEO4J_PASSWORD')

driver = neo4j.GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))

In [13]:
# Create embedder
embedder = SentenceTransformerEmbeddings(model=build_config['embedder_config']['model_name'])

# Get the index name for the text embeddings index
indexer = KGIndexer(driver=driver)
existing_indexes = indexer.list_all_indexes()
embeddings_index_name = [index['name'] for index in existing_indexes if index['type'] == 'VECTOR'][0]
fulltext_index_name = [index['name'] for index in existing_indexes if index['type'] == 'FULLTEXT'][0]

Found 5 indexes in the database:

1. {'id': 2, 'name': '__entity__id', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['__KGBuilder__'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2025, 6, 4, 8, 32, 21, 581000000, tzinfo=<UTC>), 'readCount': 498}

2. {'id': 4, 'name': 'embeddings_index', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['Chunk'], 'properties': ['embedding'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': None, 'readCount': 0}

3. {'id': 3, 'name': 'fulltext_index', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'FULLTEXT', 'entityType': 'NODE', 'labelsOrTypes': ['Chunk'], 'properties': ['text'], 'indexProvider': 'fulltext-1.0', 'owningConstraint': None, 'lastRead': None, 'readCount': 0}

4. {'id': 0, 'name': 'index_343aff4e', 'state': 'ONLINE', 'populationPercent':

# 1. Vector retriever

Similarity search using vector embeddings.

In [None]:
# Create vector retriever
retriever = VectorRetriever(
    driver=driver,
    index_name=embeddings_index_name,  # Name of the vector index
    embedder=embedder,  # Embedder to use for embedding the query text when doing a vector search
    return_properties=['text']  # Properties to return from the vector search results
)

TypeError: neo4j_graphrag.retrievers.base.Retriever.search() got multiple values for keyword argument 'query_text'

# 2. VectorCypherRetriever

Combines vector search with retrieval queries in Cypher, Neo4j’s Graph Query language, to traverse the graph and incorporate additional nodes and relationships. 

# 3. HybridRetriever

Combines vector and full-text search.

# 4. HybridCypherRetriever

Combines vector and full-text search with Cypher retrieval queries for additional graph traversal. 

# 5. Text2CypherRetriever

Converts natural language queries into Cypher queries to run against Neo4j. Does NOT search in text or perform similarity measures.

# 6. Closing the driver connection

In [None]:
driver.close()