In [1]:
import neo4j
from neo4j_graphrag_custom.kg_indexer import KGIndexer
import os
import json
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer

  return torch._C._cuda_getDeviceCount() > 0


# 0. Initial setup

In [2]:
# Load configuration and setup

script_dir = os.getcwd()

# script_dir = os.path.dirname(os.path.abspath(__file__))  # Uncomment if running as a script

# Load environment variables from a .env file
dotenv_path = os.path.join(script_dir, '.env')
load_dotenv(dotenv_path, override=True)

# Open configuration file from JSON format
config_path = os.path.join(script_dir, 'kg_building_config.json')
with open(config_path, 'r') as config_file:
    config = json.load(config_file)

# Neo4j connection
neo4j_uri = os.getenv('NEO4J_URI')
neo4j_username = os.getenv('NEO4J_USERNAME')
neo4j_password = os.getenv('NEO4J_PASSWORD')

driver = neo4j.GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))

In [3]:
indexer = KGIndexer(driver=driver)

# 1. Vector index

Index on the embeddings.

In [5]:
# Get the dimensions from the SentenceTransformer model
try:
    model = SentenceTransformer(f'sentence-transformers/{config['embedder_config']['model_name']}')  # Load the model
    embedding_dim = model.get_sentence_embedding_dimension()  # Get the embedding dimension dynamically (only if using SentenceTransformer models!)
except Exception as e:
    print(f"Error loading model: {e}. Try using a SentenceTransformer model.")

index_name = "embeddings_index"

indexer.create_vector_index(
    index_name=index_name,  # Name of the index
    label="Chunk",  # Node label to index
    embedding_property="embedding",  # Name of the node specified in "label" containing the embeddings
    dimensions=embedding_dim,  # Dimensions of the embeddings, dynamically set from the model
)

# Check if the index was created successfully
indexer.retrieve_vector_index_info(
    index_name=index_name,  # Name of the index to retrieve information about
    label_or_type="Chunk",  # Node label or relationship type to check for the index
    embedding_property="embedding"  # Name of the property containing the embeddings
)

Vector index 'embeddings_index' created successfully.
Vector index 'embeddings_index' exists with the following details:
<Record name='text_embeddings' type='VECTOR' entityType='NODE' labelsOrTypes=['Chunk'] properties=['embedding'] options={'indexProvider': 'vector-2.0', 'indexConfig': {'vector.hnsw.m': 16, 'vector.hnsw.ef_construction': 100, 'vector.dimensions': 384, 'vector.similarity_function': 'COSINE', 'vector.quantization.enabled': True}}>


# 2. Full text index

Index on the actual texts.

In [6]:
index_name = "fulltext_index"

indexer.create_fulltext_index(
    index_name=index_name,  # Name of the index
    label="Chunk",  # Node label to index
    node_properties=["text"]  # Name of the node specified in "label" containing the full text
)

# Check if the index was created successfully
indexer.retrieve_fulltext_index_info(
    index_name=index_name,  # Name of the index to retrieve information about
    label_or_type="Chunk",  # Node label or relationship type to check for the index
    text_properties=["text"]  # Name of the property containing the full text
)

Full text index 'fulltext_index' created successfully.
Full text index 'fulltext_index' exists with the following details:
<Record name='fulltext_index' type='FULLTEXT' entityType='NODE' labelsOrTypes=['Chunk'] properties=['text'] options={'indexProvider': 'fulltext-1.0', 'indexConfig': {'fulltext.analyzer': 'standard-no-stop-words', 'fulltext.eventually_consistent': False}}>


# 3. List existing indexes

In [6]:
existing_indexes = indexer.list_all_indexes()

print("\nExisting indexes:")
print(existing_indexes)

Found 3 indexes in the database:

1. {'id': 2, 'name': '__entity__id', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['__KGBuilder__'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2025, 6, 4, 8, 32, 21, 581000000, tzinfo=<UTC>), 'readCount': 498}

2. {'id': 0, 'name': 'index_343aff4e', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'LOOKUP', 'entityType': 'NODE', 'labelsOrTypes': None, 'properties': None, 'indexProvider': 'token-lookup-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2025, 6, 4, 8, 32, 21, 733000000, tzinfo=<UTC>), 'readCount': 6}

3. {'id': 1, 'name': 'index_f7700477', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'LOOKUP', 'entityType': 'RELATIONSHIP', 'labelsOrTypes': None, 'properties': None, 'indexProvider': 'token-lookup-1.0', 'owningConstraint': None, 'lastRead': None, 'readCount': 0}

Existing indexes:
[{'id'

# 4. Dropping indexes

In [5]:
# Select the created indexes to drop
indexes_to_drop = [index['name'] for index in existing_indexes if index['name'] in ["text_embeddings", "fulltext_index"]]
print(indexes_to_drop)

# Drop the specified indexes if they exist
for index in indexes_to_drop:
    print(f"Dropping index: {index}")
    indexer.drop_index_if_exists(index_name=index)

['fulltext_index', 'text_embeddings']
Dropping index: fulltext_index
Index 'fulltext_index' dropped if it existed.
Dropping index: text_embeddings
Index 'text_embeddings' dropped if it existed.


# 5. Closing the driver connection

In [7]:
driver.close()