# Querying

In [1]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph.storage.graph.falkordb import FalkorDBGraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

# Register the FalkorDB backend with the factory
GraphStoreFactory.register(FalkorDBGraphStoreFactory)

# Create graph and vector stores
graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

In [2]:
# Hydrate GraphRAGConfig and warm up LLM + embedding clients
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine, set_logging_config
from graphrag_toolkit.lexical_graph.tenant_id import TenantId

# Optional: enable logging to see detailed progress
set_logging_config("INFO")

# First, run a bootstrap call with tenant_id to hydrate all dynamic config internals
_ = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store=graph_store,
    vector_store=vector_store,
    tenant_id=TenantId("awsgraph1")  # Ensures embedding + region config loads
)


In [3]:
from graphrag_toolkit.lexical_graph.retrieval.retrievers import (
    RerankingBeamGraphSearch,
    StatementCosineSimilaritySearch,
    KeywordRankingSearch
)
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine

# Build advanced retrievers
cosine_retriever = StatementCosineSimilaritySearch(
    graph_store=graph_store,
    vector_store=vector_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    graph_store=graph_store,
    vector_store=vector_store,
    max_keywords=10
)

reranker = SentenceReranker(batch_size=128)

beam_retriever = RerankingBeamGraphSearch(
    graph_store=graph_store,
    vector_store=vector_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

# Now initialize the real query engine (tenant_id optional since hydrated already)
query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store=graph_store,
    vector_store=vector_store,
    retrievers=[cosine_retriever, keyword_retriever, beam_retriever]
)

# Execute a query
response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")
print(response.response)


  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 09:13:28:INFO:datasets       :PyTorch version 2.7.0 available.
I apologize, but I do not have enough information from the provided search results to accurately answer your question about the differences between Neptune Database and Neptune Analytics. The search results are empty, so I cannot make any factual statements or comparisons between these two services. To provide a proper response, I would need specific information about both Neptune Database and Neptune Analytics from reliable sources.


In [2]:
# Hydrate GraphRAGConfig and warm up LLM + embedding clients
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine, set_logging_config
from graphrag_toolkit.lexical_graph.tenant_id import TenantId
from graphrag_toolkit.lexical_graph.retrieval.retrievers import (
    RerankingBeamGraphSearch,
    StatementCosineSimilaritySearch,
    KeywordRankingSearch
)
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

# Optional: enable logging to see detailed progress
set_logging_config("INFO")

# First, run a bootstrap call with tenant_id to hydrate all dynamic config internals
_ = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store=graph_store,
    vector_store=vector_store,
    tenant_id=TenantId("awsgraph1")  # This ensures embedding and region config loads
)

# Then build advanced retrievers
cosine_retriever = StatementCosineSimilaritySearch(
    graph_store=graph_store,
    vector_store=vector_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    graph_store=graph_store,
    vector_store=vector_store,
    max_keywords=10
)

reranker = SentenceReranker(batch_size=128)

beam_retriever = RerankingBeamGraphSearch(
    graph_store=graph_store,
    vector_store=vector_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

# Now initialize the real query engine (tenant_id optional since hydrated already)
query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store=graph_store,
    vector_store=vector_store,
    retrievers=[cosine_retriever, keyword_retriever, beam_retriever]
)

# Execute a query
response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")
print(response.response)


  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 09:11:47:INFO:datasets       :PyTorch version 2.7.0 available.
I apologize, but I do not have enough information from the provided search results to accurately answer your question about the differences between Neptune Database and Neptune Analytics. The search results are empty, so I cannot make any factual statements or comparisons between these two services. To provide a proper answer, I would need specific information about the features, capabilities, and use cases of both Neptune Database and Neptune Analytics.


In [4]:
# Configure AWS Profile and Region
from graphrag_toolkit.lexical_graph import GraphRAGConfig
print(f"GraphRAGConfig: {GraphRAGConfig}")

GraphRAGConfig: _GraphRAGConfig(_aws_profile='padmin', _aws_region='us-east-1', _aws_clients={}, _extraction_llm=None, _response_llm=BedrockConverse(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7ed0af3b05c0>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x7ed19326ade0>, completion_to_prompt=<function default_completion_to_prompt at 0x7ed1930d6660>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='us.anthropic.claude-3-5-sonnet-20240620-v1:0', temperature=0.0, max_tokens=4096, profile_name='padmin', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name='us-east-1', botocore_session=None, botocore_config=None, max_retries=10, timeout=60.0, guardrail_identifier=None, guardrail_version=None, trace=None, additional_kwargs={}), _embed_model=BedrockEmbedding(model_name='cohere.embed-english-v3', embed_batch_size=10, callback_manager=

In [2]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.tenant_id import TenantId
from graphrag_toolkit.lexical_graph.retrieval.retrievers import RerankingBeamGraphSearch, StatementCosineSimilaritySearch, KeywordRankingSearch
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker, SentenceReranker, StatementDiversityPostProcessor, StatementEnhancementPostProcessor
import os

set_logging_config('INFO')
cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    tenant_id=TenantId("awsgraph1"),

    #post_processors=[
    #    SentenceReranker(),
    #    StatementDiversityPostProcessor(),
    #    StatementEnhancementPostProcessor()
    #]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)



I apologize, but I do not have enough information from the provided search results to accurately answer your question about the differences between Neptune Database and Neptune Analytics. The search results are empty, so I cannot make any claims or comparisons between these two services. To provide an accurate response, I would need specific information about both Neptune Database and Neptune Analytics from reliable sources.


In [7]:
# Configure AWS Profile and Region
from graphrag_toolkit.lexical_graph import GraphRAGConfig
print(f"GraphRAGConfig: {GraphRAGConfig}")

GraphRAGConfig: _GraphRAGConfig(_aws_profile='padmin', _aws_region='us-east-1', _aws_clients={}, _extraction_llm=None, _response_llm=BedrockConverse(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x710f9bfb6810>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x71107f1aae80>, completion_to_prompt=<function default_completion_to_prompt at 0x71107f212700>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='us.anthropic.claude-3-5-sonnet-20240620-v1:0', temperature=0.0, max_tokens=4096, profile_name='padmin', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name='us-east-1', botocore_session=None, botocore_config=None, max_retries=10, timeout=60.0, guardrail_identifier=None, guardrail_version=None, trace=None, additional_kwargs={}), _embed_model=BedrockEmbedding(model_name='cohere.embed-english-v3', embed_batch_size=10, callback_manager=

In [6]:
from graphrag_toolkit.lexical_graph import GraphRAGConfig
from llama_index.llms.bedrock_converse import BedrockConverse
from llama_index.embeddings.bedrock import BedrockEmbedding

# Access the global instance
cfg = GraphRAGConfig

# Override embed model
cfg._embed_model = BedrockEmbedding(
    model_name="cohere.embed-english-v3",
    profile_name="padmin",
    region_name="us-east-1"
)

# Override response model
cfg._response_llm = BedrockConverse(
    model="us.anthropic.claude-3-5-sonnet-20240620-v1:0",
    profile_name="padmin",
    region_name="us-east-1",
    temperature=0.0,
    max_tokens=4096
)

# Optional: explicitly set known dimensions and flags
cfg._embed_dimensions = 1024
cfg._enable_cache = False


In [3]:
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.retrieval.retrievers import RerankingBeamGraphSearch, StatementCosineSimilaritySearch, KeywordRankingSearch
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker
from graphrag_toolkit.lexical_graph.tenant_id import TenantId
from graphrag_toolkit.lexical_graph import set_logging_config

set_logging_config('INFO')

cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(
    batch_size=128
)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    retrievers=[
        cosine_retriever,
        keyword_retriever,
        beam_retriever
    ],
    #tenant_id=TenantId("awsgraph1")
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)

  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 09:09:45:INFO:datasets       :PyTorch version 2.7.0 available.
2025-05-06 09:09:48:INFO:g.l.r.r.rerank_beam_search:Retrieved 99 new nodes through beam search.
Neptune Database and Neptune Analytics are two distinct components of Amazon Neptune, each designed for different purposes and use cases. Here are the key differences between them:

1. Purpose:
Neptune Database is a fully managed graph database service [source_4], while Neptune Analytics is an analytics database engine specifically designed for graph analytics [source_1, source_2].

2. Use Cases:
Neptune Database is suitable for applications like fraud alerting, Customer 360, and social networking [source_1]. Neptune Analytics, on the other hand, is ideal for data science workloads, investigatory and exploratory tasks, and workloads requiring fast iteration for analytical, data, and algorithmic processing [source_2].

3. Data Processing:
Neptune Database is designed for optimal scalability and availability, capable of 

In [5]:
# Configure AWS Profile and Region
from graphrag_toolkit.lexical_graph import GraphRAGConfig
print(f"GraphRAGConfig: {GraphRAGConfig}")

GraphRAGConfig: _GraphRAGConfig(_aws_profile='padmin', _aws_region='us-east-1', _aws_clients={}, _extraction_llm=None, _response_llm=BedrockConverse(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x72674b4a5fa0>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x7268317aee80>, completion_to_prompt=<function default_completion_to_prompt at 0x726831816700>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='us.anthropic.claude-3-5-sonnet-20240620-v1:0', temperature=0.0, max_tokens=4096, profile_name='padmin', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name='us-east-1', botocore_session=None, botocore_config=None, max_retries=10, timeout=60.0, guardrail_identifier=None, guardrail_version=None, trace=None, additional_kwargs={}), _embed_model=BedrockEmbedding(model_name='cohere.embed-english-v3', embed_batch_size=10, callback_manager=

In [2]:
# ---- 1. Configure GraphRAG first ----
from graphrag_toolkit.lexical_graph import GraphRAGConfig
from llama_index.embeddings.bedrock import BedrockEmbedding
from llama_index.llms.bedrock_converse import BedrockConverse

GraphRAGConfig._embed_model = BedrockEmbedding(
    model_name="cohere.embed-english-v3",
    profile_name="padmin",
    region_name="us-east-1"
)
GraphRAGConfig._embed_dimensions = 1024

GraphRAGConfig._response_llm = BedrockConverse(
    model="us.anthropic.claude-3-5-sonnet-20240620-v1:0",
    profile_name="padmin",
    region_name="us-east-1",
    temperature=0.0,
    max_tokens=4096
)

# ---- 2. Now initialize the vector and graph stores ----
import os
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory, GraphStoreFactory

graph_store = GraphStoreFactory.for_graph_store(os.environ["GRAPH_STORE"])
vector_store = VectorStoreFactory.for_vector_store(os.environ["VECTOR_STORE"])

# ---- 3. Setup logging ----
from graphrag_toolkit.lexical_graph import set_logging_config
set_logging_config("INFO")

# ---- 4. Create retrievers ----
from graphrag_toolkit.lexical_graph.retrieval.retrievers import (
    RerankingBeamGraphSearch,
    StatementCosineSimilaritySearch,
    KeywordRankingSearch
)
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(batch_size=128)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

# ---- 5. Initialize query engine ----
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    retrievers=[cosine_retriever, keyword_retriever, beam_retriever]
    # You can enable multi-tenancy with: tenant_id=TenantId("awsgraph1")
)

# ---- 6. Run the query ----
response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")
print(response.response)


  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 09:08:06:INFO:datasets       :PyTorch version 2.7.0 available.
I apologize, but I do not have enough information from the provided search results to accurately answer your question about the differences between Neptune Database and Neptune Analytics. The search results are empty, so I cannot make any factual statements or comparisons between these two services. To provide a proper answer, I would need specific information about the features, capabilities, and use cases of both Neptune Database and Neptune Analytics.


In [3]:
# Configure AWS Profile and Region
from graphrag_toolkit.lexical_graph import GraphRAGConfig
print(f"GraphRAGConfig: {GraphRAGConfig}")

GraphRAGConfig: _GraphRAGConfig(_aws_profile='padmin', _aws_region='us-east-1', _aws_clients={}, _extraction_llm=None, _response_llm=BedrockConverse(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x71df0ae6c830>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x71dff1186de0>, completion_to_prompt=<function default_completion_to_prompt at 0x71dff11ee660>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='us.anthropic.claude-3-5-sonnet-20240620-v1:0', temperature=0.0, max_tokens=4096, profile_name='padmin', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name='us-east-1', botocore_session=None, botocore_config=None, max_retries=10, timeout=60.0, guardrail_identifier=None, guardrail_version=None, trace=None, additional_kwargs={}), _embed_model=BedrockEmbedding(model_name='cohere.embed-english-v3', embed_batch_size=10, callback_manager=