In [1]:
import sys
import os

# Add the parent directory (graphrag_pipeline) to the Python path (needed for importing
# modules in parent directory)
parent_dir = os.path.dirname(os.getcwd())
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import neo4j
from library.kg_indexer import KGIndexer
from library.kg_builder.utilities import GeminiLLM
from neo4j_graphrag.schema import get_schema
import os
import json
from dotenv import load_dotenv
from neo4j_graphrag.embeddings import SentenceTransformerEmbeddings
from neo4j_graphrag.retrievers import (
    VectorRetriever,
    VectorCypherRetriever,
    HybridRetriever,
    HybridCypherRetriever,
    Text2CypherRetriever
)
from neo4j_graphrag.generation import RagTemplate
from neo4j_graphrag.generation.graphrag import GraphRAG

  return torch._C._cuda_getDeviceCount() > 0


# 0. Initial setup

Note: this notebook assumes the existence of an *indexed* knowledge graph (with the full text and the embeddings indexed) in the Neo4j database that is called. 

In [2]:
# Find path to config_files folder
config_files_path = os.path.join(os.path.dirname(os.getcwd()), 'config_files')

# Load environment variables from .env file
load_dotenv(os.path.join(config_files_path, '.env'), override=True)

# Open configuration file from JSON format
config_path = os.path.join(config_files_path, 'kg_building_config.json')  # Configuration file of the knowledge graph builder
with open(config_path, 'r') as kg_build_config_file:
    build_config = json.load(kg_build_config_file)
config_path = os.path.join(config_files_path, 'kg_retrieval_config.json')  # Configuration file of the knowledge graph retriever
with open(config_path, 'r') as kg_retr_config_file:
    retr_config = json.load(kg_retr_config_file)
config_path = os.path.join(config_files_path, 'graphrag_config.json')  # Configuration file of the GraphRAG
with open(config_path, 'r') as graphrag_config_file:
    graphrag_config = json.load(graphrag_config_file)

# Neo4j connection
neo4j_uri = os.getenv('NEO4J_URI')
neo4j_username = os.getenv('NEO4J_USERNAME')
neo4j_password = os.getenv('NEO4J_PASSWORD')
gemini_api_key = os.getenv('GEMINI_API_KEY')

driver = neo4j.GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))

In [4]:
for data_sources, config in build_config.items():
    print(data_sources)
    print(config)

text_splitter_config
{'chunk_size': 100000, 'chunk_overlap': 1000}
embedder_config
{'model_name': 'all-MiniLM-L6-v2', 'model_params': {}}
llm_config
{'model_name': 'gemini-2.5-flash-preview-05-20', 'model_params': {'temperature': 0.0}}
schema_config
{'create_schema': True, 'nodes': [{'label': 'Event', 'description': 'Significant occurrences of the input text, such as conflicts, elections, coups, attacks or any other relevant information.', 'properties': [{'name': 'name', 'type': 'STRING'}, {'name': 'start_date', 'type': 'DATE', 'description': 'Date when the event started or when the information was first reported.'}, {'name': 'end_date', 'type': 'DATE', 'description': 'Date when the event ended or when the information was last updated.'}, {'name': 'type', 'type': 'STRING', 'description': 'Type of event, e.g., Conflict, Attack, Election.'}]}, {'label': 'Actor', 'description': 'All kinds of entities mentioned, such as terrorist groups, political parties, military, individuals, etc.', 'pr

In [None]:
# Create embedder
embedder = SentenceTransformerEmbeddings(model=build_config['embedder_config']['model_name'])

# Get the index name for the text embeddings index
indexer = KGIndexer(driver=driver)
existing_indexes = indexer.list_all_indexes()
embeddings_index_name = [index['name'] for index in existing_indexes if index['type'] == 'VECTOR'][0]
fulltext_index_name = [index['name'] for index in existing_indexes if index['type'] == 'FULLTEXT'][0]

Found 5 indexes in the database:

1. {'id': 2, 'name': '__entity__id', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['__KGBuilder__'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2025, 6, 5, 14, 52, 4, 898000000, tzinfo=<UTC>), 'readCount': 3508}

2. {'id': 3, 'name': 'embeddings_index', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['Chunk'], 'properties': ['embedding'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2025, 6, 6, 15, 28, 42, 412000000, tzinfo=<UTC>), 'readCount': 37}

3. {'id': 4, 'name': 'fulltext_index', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'FULLTEXT', 'entityType': 'NODE', 'labelsOrTypes': ['Chunk'], 'properties': ['text'], 'indexProvider': 'fulltext-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2025, 6, 6, 15,

In [12]:
sample_query_text = "Which have been the most pressing security-related issues in Sudan in the last year? What are the future prospects for the country considering the current situation?"

# 1. Retrievers

## 1.1. Vector retriever

Similarity search using vector embeddings.

In [5]:
# Create vector retriever
v_retriever = VectorRetriever(
    driver=driver,
    index_name=embeddings_index_name,  # Name of the vector index that will be used for similarity search with the embedded query text
    embedder=embedder,  # Embedder to use for embedding the query text when doing a vector search
    return_properties=['text']  # Properties to return from the vector search results, apart from the similarity scores (cosine similarity scores by default)
)

Now, let's check which information is retrieved with the sample query text and this vector retriever. This will NOT be the final output of GraphRAG, but an intermediate step where all of the relevant information is compiled according to the characteristics of the retriever. In this case, the vector retriever will:
1. Embed the query text with the embedder. At this point, it could be interesting to consider **hypothetical document embeddings** (see Arnault's slides of Advanced NLP, session 9).
2. Compute the cosine similarity of the embedded query text with the embeddings of the text.
3. Return the cosine similarity of the closest vectors, together with their text (if the property "text" is returned).

Now, let's see how Neo4j is getting the results with the lower-level (but more robust than `.search()`) `.get_search_results()` method.

In [25]:
v_results = v_retriever.get_search_results(
    query_vector=None,  # The query vector is None because we will use the embedder to embed the query text
    query_text=sample_query_text,  # The query text to embed and search for
    top_k=5  # Number of results to return
)

print("Raw search results:\n", v_results)

for i in v_results.records: print("=" * 50 + "\n" + json.dumps(i.data(), indent=4))

Raw search results:
{
    "node": {
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:264",
    "id": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:264",
    "score": 0.8028373718261719
}
{
    "node": {
        "text": "Sudanese civil defense says all fires at major oil depots in government-controlled Port Sudan are now \"completely\" under control following numerous RSF attacks on petroleum reserves"
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:296",
    "id": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:296",
    "score": 0.78676438331604
}
{
    "node": {
        "text": "Sudanese media reports new drone strikes on unspecified areas in Port Sudan"
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:364",
    "id": "4:6c5b3cb6-25d4-4346-af2c-705e777

## 1.2. VectorCypherRetriever

Combines vector search with retrieval queries in Cypher, Neo4j’s Graph Query language, to traverse the graph and incorporate additional nodes and relationships. Below we create a retriever to obtain Chunk nodes via vector search, then traversing out on entities up to 3 hops out (query taken from [this article](https://neo4j.com/blog/news/graphrag-python-package/)). 

In [6]:
retrieval_query = """
//1) Go out 2-3 hops in the entity graph and get relationships
WITH node AS chunk
MATCH (chunk)<-[:FROM_CHUNK]-()-[relList:!FROM_CHUNK]-{1,2}()
UNWIND relList AS rel

//2) collect relationships and text chunks
WITH collect(DISTINCT chunk) AS chunks, 
  collect(DISTINCT rel) AS rels

//3) format and return context
RETURN '=== text ===\n' + apoc.text.join([c in chunks | c.text], '\n---\n') + '\n\n=== kg_rels ===\n' +
  apoc.text.join([r in rels | startNode(r).name + ' - ' + type(r) + '(' + coalesce(r.details, '') + ')' +  ' -> ' + endNode(r).name ], '\n---\n') AS info
"""

In [7]:
print(retr_config['VectorCypherRetriever_config']['retrieval_query'])

//1) Go out 2-3 hops in the entity graph and get relationships
WITH node AS chunk
MATCH (chunk)<-[:FROM_CHUNK]-()-[relList:!FROM_CHUNK]-{1,2}()
UNWIND relList AS rel

//2) collect relationships and text chunks
WITH collect(DISTINCT chunk) AS chunks,
 collect(DISTINCT rel) AS rels

//3) format and return context
RETURN '=== text ===\n' + apoc.text.join([c in chunks | c.text], '\n---\n') + '\n\n=== kg_rels ===\n' +
 apoc.text.join([r in rels | startNode(r).name + ' - ' + type(r) + '(' + coalesce(r.details, '') + ')' +  ' -> ' + endNode(r).name ], '\n---\n') AS info


In [8]:
# Create vector retriever
vc_retriever = VectorCypherRetriever(
    driver=driver,
    index_name=embeddings_index_name,  # Name of the vector index that will be used for similarity search with the embedded query text
    retrieval_query=retrieval_query, # Cypher query to retrieve the context surrounding the embeddings that are found for the results
    embedder=embedder  # Embedder to use for embedding the query text when doing a vector search
)

In [8]:
vc_results = vc_retriever.get_search_results(
    query_vector=None,  # The query vector is None because we will use the embedder to embed the query text
    query_text=sample_query_text,  # The query text to embed and search for
    top_k=5  # Number of results to return
)

# print output
kg_rel_pos = vc_results.records[0]['info'].find('=== kg_rels ===\n')
print("# Text Chunk Context:\n")
print(vc_results.records[0]['info'][:kg_rel_pos])
print("\n# KG Context From Relationships:\n")
print(vc_results.records[0]['info'][kg_rel_pos:])

# Text Chunk Context:

=== text ===
---
Sudanese civil defense says all fires at major oil depots in government-controlled Port Sudan are now "completely" under control following numerous RSF attacks on petroleum reserves
---
Sudanese media reports new drone strikes on unspecified areas in Port Sudan
---
Sudanese media report new drone attack on Port Sudan with no specific location given; air defense reported at work
---
Sudanese military, citing official, reports drone attack on Port Sudan targeted civilian facilities including air base and a cargo warehouse; unverified reports claim power outages in parts of city [corrects location struck]



# KG Context From Relationships:

=== kg_rels ===
Emirati - COOPERATED_WITH() -> RSF
---
UAE ships - COOPERATED_WITH() -> RSF
---
UAE ships - IS_FROM() -> Red Sea
---
UAE ships - IS_FROM() -> UAE
---
RSF - IS_FROM() -> Nyala
---
Atbara airport - IS_WITHIN() -> Nyala
---
Nyala - IS_WITHIN() -> Sudan
---
Nyala - IS_WITHIN() -> Darfur
---
airstrike

i.e., the `VectorCypherRetriever` extracts information both from the similarity scores of the query embeddings with the text embeddings as well as with the graph properties.

## 1.3. HybridRetriever

Combines vector and full-text search.

In [9]:
hy_retriever = HybridRetriever(
    driver=driver,
    vector_index_name=embeddings_index_name,  # Name of the vector index that will be used for similarity search with the embedded query text
    fulltext_index_name=fulltext_index_name,  # Name of the fulltext index that will be used for text search
    embedder=embedder,  # Embedder to use for embedding the query text when doing a vector search
    return_properties=['text']  # Properties to return from the vector search results, apart from the similarity scores (cosine similarity scores by default)
)

In [56]:
hy_results = hy_retriever.get_search_results(
    query_vector=None,  # The query vector is None because we will use the embedder to embed the query text
    query_text=sample_query_text,  # The query text to embed and search for
    top_k=5,  # Number of results to return
    ranker='linear',  # Ranker to use for ranking the results, 'linear' is a simple linear combination of the vector and text scores, "naive" is default value and just combines the scores without weighting them 
    alpha=0.5  # Weighting factor for the vector score in the linear combination, 0.5 means equal weighting for vector and text scores
)

print("Raw search results:\n", hy_results)

for i in hy_results.records: print("=" * 50 + "\n" + json.dumps(i.data(), indent=4))

Raw search results:
{
    "node": {
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:264",
    "id": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:264",
    "score": 0.5
}
{
    "node": {
        "text": "Editor's note: We are aware of images circulating on social media of a large fire burning at multiple oil depots in Port Sudan, Sudan. The attack has not yet been reported on by Sudanese media, and the cause of the fires is still unclear. The fires come just one day after a series of RSF paramilitary drone strikes in the city, which were the first of the country's two year civil war. We are watching our sources for more information. - Owen"
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:434",
    "id": "4:6c5b3cb6-25d4-4346-af2c-705e7774defb:434",
    "score": 0.5
}
{
    "node": {
        "text": "Sudanese civil defense says

## 1.4. HybridCypherRetriever

Combines vector and full-text search with Cypher retrieval queries for additional graph traversal. 

In [10]:
# Create vector retriever
hyc_retriever = HybridCypherRetriever(
    driver=driver,
    vector_index_name=embeddings_index_name,  # Name of the vector index that will be used for similarity search with the embedded query text
    fulltext_index_name=fulltext_index_name,  # Name of the fulltext index that will be used for text search
    retrieval_query=retrieval_query, # Cypher query to retrieve the context surrounding the embeddings that are found for the results
    embedder=embedder  # Embedder to use for embedding the query text when doing a vector search
)

In [58]:
hyc_results = hyc_retriever.get_search_results(
    query_vector=None,  # The query vector is None because we will use the embedder to embed the query text
    query_text=sample_query_text,  # The query text to embed and search for
    top_k=5,  # Number of results to return
    ranker="linear",  # Ranker to use for ranking the results, 'linear' is a simple linear combination of the vector and text scores, "naive" is default value and just combines the scores without weighting them
    alpha=0.5  # Weighting factor for the vector score in the linear combination, 0.5 means equal weighting for vector and text scores
)

# print output
kg_rel_pos = hyc_results.records[0]['info'].find('=== kg_rels ===\n')
print("# Text Chunk Context:\n")
print(hyc_results.records[0]['info'][:kg_rel_pos])
print("\n# KG Context From Relationships:\n")
print(hyc_results.records[0]['info'][kg_rel_pos:])

# Text Chunk Context:

=== text ===
---
Editor's note: We are aware of images circulating on social media of a large fire burning at multiple oil depots in Port Sudan, Sudan. The attack has not yet been reported on by Sudanese media, and the cause of the fires is still unclear. The fires come just one day after a series of RSF paramilitary drone strikes in the city, which were the first of the country's two year civil war. We are watching our sources for more information. - Owen
---
Sudanese civil defense says all fires at major oil depots in government-controlled Port Sudan are now "completely" under control following numerous RSF attacks on petroleum reserves
---
Sudanese media reports new drone strikes on unspecified areas in Port Sudan
---
Sudanese media report new drone attack on Port Sudan with no specific location given; air defense reported at work



# KG Context From Relationships:

=== kg_rels ===
Emirati - COOPERATED_WITH() -> RSF
---
UAE ships - COOPERATED_WITH() -> RSF
--

## 1.5. Text2CypherRetriever

Converts natural language queries into Cypher queries to run against Neo4j. Does NOT search in text or perform similarity measures.

In [13]:
llm = GeminiLLM(
    model_name=retr_config['Text2CypherRetriever_config']['llm_config']['model_name'],  # LLM model name to use for generating Cypher queries from the query text
    google_api_key=gemini_api_key,
    model_params= retr_config['Text2CypherRetriever_config']['llm_config']['model_params'],  # Model parameters for the LLM
)

In [14]:
schema = get_schema(  # Get the schema of the knowledge graph
    driver=driver,
    is_enhanced=True,  # Whether to use the enhanced schema with additional information (e.g., include examples)
    sanitize=False
)

print(schema)

Node properties:
- **Document**
  - `id`: STRING Example: "5a9f0108-8991-44ae-9dbb-263a1a40eff7"
  - `path`: STRING Example: "1"
  - `createdAt`: STRING Example: "2025-06-05T14:20:10.336471+00:00"
  - `source`: STRING Example: "https://www.unicef.org/press-releases/wfpunicef-hu"
  - `published_date`: STRING Example: "2025-06-03 15:28:28.271179+00:00"
- **Chunk**
  - `id`: STRING Example: "f830f947-5d6c-4a4b-94b5-a4154b66e0bf"
  - `index`: INTEGER Min: 0, Max: 0
  - `text`: STRING Example: "WFP and UNICEF now say five members were killed, s"
- **Actor**
  - `id`: STRING Example: "f830f947-5d6c-4a4b-94b5-a4154b66e0bf:0"
  - `name`: STRING Example: "WFP"
  - `chunk_index`: INTEGER Min: 0, Max: 0
  - `type`: STRING Example: "International Organization"
- **Event**
  - `id`: STRING Example: "f830f947-5d6c-4a4b-94b5-a4154b66e0bf:2"
  - `name`: STRING Example: "attack on aid convoy"
  - `chunk_index`: INTEGER Min: 0, Max: 0
  - `type`: STRING Example: "Attack"
  - `start_date`: STRING Availab

In [15]:
examples = [
    "USER INPUT: 'What events happened in Sudan?'\nQUERY: MATCH (e:Event)-[:HAPPENED_IN]->(c:Country) WHERE c.name = 'Sudan' RETURN e.name, e.type",
    "USER INPUT: 'Which actors participated in attacks?'\nQUERY: MATCH (a:Actor)-[:PARTICIPATED_IN]->(e:Event) WHERE e.type = 'Attack' RETURN a.name, a.type"
]

In [16]:
print(retr_config['Text2CypherRetriever_config']['examples_config'])

{'include_examples': True, 'examples': ["USER INPUT: 'What events happened in Sudan?'\nQUERY: MATCH (e:Event)-[:HAPPENED_IN]->(c:Country) WHERE c.name = 'Sudan' RETURN e.name, e.type", "USER INPUT: 'Which actors participated in attacks?'\nQUERY: MATCH (a:Actor)-[:PARTICIPATED_IN]->(e:Event) WHERE e.type = 'Attack' RETURN a.name, a.type"]}


In [17]:
t2c_retriever = Text2CypherRetriever(
    driver=driver,
    llm=llm,  # LLM to use for generating Cypher queries from the query text
    neo4j_schema=schema,  # Schema of the knowledge graph to use for generating Cypher queries
    examples=examples,  # Examples to use for generating Cypher queries
    custom_prompt=None  # Use the auto-generated prompt by th LLM
)

In [94]:
t2c_results = t2c_retriever.get_search_results(
    query_text= sample_query_text,  # The natural language query used to generate the Cypher query
)

# Print results in a more readable format
print("=" * 80)
print("TEXT-TO-CYPHER RETRIEVAL RESULTS")
print("=" * 80)
print(f"Query: {sample_query_text}")
print(f"Generated Cypher: {t2c_results.metadata['cypher']}")
print("-" * 80)
print(f"Found {len(t2c_results.records)} events in Sudan:")
print("-" * 80)

# Group events by type for better organization
events_by_type = {}
for record in t2c_results.records:
    event_type = record['e.type']
    event_name = record['e.name']
    
    if event_type not in events_by_type:
        events_by_type[event_type] = []
    events_by_type[event_type].append(event_name)

# Display events grouped by type
for event_type, events in sorted(events_by_type.items()):
    print(f"\n📍 {event_type.upper()} ({len(events)} events):")
    for i, event in enumerate(events, 1):
        print(f"   {i}. {event}")

print("\n" + "=" * 80)

TEXT-TO-CYPHER RETRIEVAL RESULTS
Query: Which have been the most pressing security-related issues in Sudan in the last year? What are the future prospects for the country considering the current situation?
Generated Cypher: MATCH (e:Event)-[:HAPPENED_IN]->(c:Country) WHERE c.name = 'Sudan' RETURN e.name, e.type
--------------------------------------------------------------------------------
Found 15 events in Sudan:
--------------------------------------------------------------------------------

📍 ATTACK (5 events):
   1. attack on aid convoy
   2. strike on a prison
   3. large fire
   4. drones attack
   5. Hospital Bombing

📍 CONFLICT (2 events):
   1. civil war
   2. clashes

📍 DEATHS (1 events):
   1. 12 deaths

📍 DISEASE CASES (1 events):
   1. 727 cholera cases

📍 GOVERNMENT ACTION (2 events):
   1. Government Dissolution
   2. Undertaking Responsibilities

📍 NATURAL DISASTER IMPACT (1 events):
   1. Impact of Heavy Rain

📍 ASSESSMENT (1 events):
   1. assessment of refugee con

# 2. GraphRAG

You can construct GraphRAG pipelines with the `GraphRAG` class.  At a minimum, you will need to pass the constructor an LLM and a retriever. You can optionally pass a custom prompt template. We will do so here just to provide a bit more guidance for the LLM to stick to information from our data source.

> GraphRAG doesn't completely eliminate vector embeddings—it enhances them with graph structure. Instead, it improves upon pure vector-based RAG by incorporating knowledge graph relationships, rather than completely replacing vector embeddings. It's an augmentation strategy that combines the best of both worlds.

## 2.1. Overview of the execution flow

Looking at the `GraphRAG.search()` method in graphrag.py of the `neo4j-graphrag` library, here's the executed flow:

1. **Retrieval Phase**: The retriever (e.g., `v_retriever`, `vc_retriever`) searches for relevant information:
   ```python
   retriever_result: RetrieverResult = self.retriever.search(
       query_text=query, **validated_data.retriever_config
   )
   ```

2. **Context Assembly**: The retrieved results are concatenated into a context string:
   ```python
   context = "\n".join(item.content for item in retriever_result.items)
   ```

3. **Prompt Formatting**: The template is formatted with the assembled context:
   ```python
   prompt = self.prompt_template.format(
       query_text=query_text, context=context, examples=validated_data.examples
   )
   ```

4. **LLM Generation**: The formatted prompt is sent to the LLM:
   ```python
   answer = self.llm.invoke(
       prompt,
       message_history,
       system_instruction=self.prompt_template.system_instructions,
   )
   ```

**Context Sources by Retriever Type**

Different retrievers provide different types of context:

- **VectorRetriever**: Returns just the text chunks based on semantic similarity
- **VectorCypherRetriever**: Returns formatted text with both chunks and knowledge graph relationships
- **HybridRetriever**: Combines vector and full-text search results
- **Text2CypherRetriever**: Returns structured query results from the knowledge graph

## 2.2. Implementation with different retrievers

First, we define the LLM that we will use to generate the final answer.

In [18]:
llm = GeminiLLM(
    model_name=graphrag_config['llm_config']['model_name'],  # LLM model name to use for generating answers
    google_api_key=gemini_api_key,
    model_params=graphrag_config['llm_config']['model_params'],  # Model parameters for the LLM
)

Second, we create a prompt template (if we don't use the default one) through which the retrieved context, potential examples and the question will be injected.

The default template `RagTemplate` looks as follows:

```
class RagTemplate(PromptTemplate):
    DEFAULT_SYSTEM_INSTRUCTIONS = "Answer the user question using the provided context."
    DEFAULT_TEMPLATE = """Context:
{context}

Examples:
{examples}

Question:
{query_text}

Answer:
"""
    EXPECTED_INPUTS = ["context", "query_text", "examples"]

    def format(self, query_text: str, context: str, examples: str) -> str:
        return super().format(query_text=query_text, context=context, examples=examples)
```

In [19]:
rag_template = RagTemplate(template='''
# Question:
{query_text}
 
# Context:
{context}

# Examples:
{examples}
                           
# Answer:
''', expected_inputs=['query_text', 'context', 'examples'], 
system_instructions="Answer the Question using the following Context. Only respond with information mentioned in the Context. Do not inject any speculative information not mentioned. If no examples are provided, omit the Examples section in your answer.")

In [20]:
print(graphrag_config['rag_template_config']['template'])

# Question:
{query_text}
 
# Context:
{context}
 
# Examples:
{examples}
 
# Answer:



Initialize different GraphRAG instances with different retrievers (which will provide different outcomes).

In [21]:
# Create RAG instances for different retrievers
v_rag  = GraphRAG(llm=llm, retriever=v_retriever, prompt_template=rag_template)  # For vector retriever
vc_rag = GraphRAG(llm=llm, retriever=vc_retriever, prompt_template=rag_template)  # For vector cypher retriever
hy_rag = GraphRAG(llm=llm, retriever=hy_retriever, prompt_template=rag_template)  # For hybrid retriever
hyc_rag = GraphRAG(llm=llm, retriever=hyc_retriever, prompt_template=rag_template)  # For hybrid cypher retriever
t2c_rag = GraphRAG(llm=llm, retriever=t2c_retriever, prompt_template=rag_template)  # For text-to-cypher retriever

Now, let's set a question and get an answer out of it using GraphRAG. 

> Note that the `query_text` parameter of the child classes of the `Retriever` class is the same as the `query_text` parameter of the `GraphRAG.search()` method, so only one should be provided.

In [22]:
question = "Generate a brief security report for Sudan in the last year, with a forward-looking perspective on the future of the country. Structure the report so that events are at the center of it. Cite the sources of information used in the report."

In [23]:
print("\n===========================\n")
v_rag_result = v_rag.search(
    query_text=question,  # User question
    message_history=None,  # Optional message history for conversational context (omit for now)
    examples="",  # Optional examples to guide the LLM's response (defaults to empty string)
    retriever_config=retr_config['VectorRetriever_config']['search_params'],  # Configuration for the vector retriever
    return_context=True,  # Whether to return the context used for generating the answer (default is False)
)
print(f"Vector RAG Answer: \n{v_rag_result.answer}")



Vector RAG Answer: 
**Security Report: Sudan - Last Year's Overview and Future Outlook**

**I. Introduction**
Sudan has remained engulfed in a devastating civil conflict over the past year, primarily between the Sudanese Armed Forces (SAF) and the Rapid Support Forces (RSF). While fighting has ravaged Khartoum and other regions, the conflict's expansion to previously relatively secure areas, particularly Port Sudan, marks a significant escalation and poses new challenges to the country's stability and humanitarian efforts.

**II. Key Security Events (Last Year): Expansion of Conflict to Port Sudan**

The most critical development in Sudan's security landscape over the last year has been the extension of direct conflict to **Port Sudan**, the country's vital Red Sea port and a key logistical hub for the SAF and humanitarian aid.

*   **Initial Drone Attacks on Civilian and Military Facilities:** In a notable shift, the Sudanese military reported drone attacks on Port Sudan, specifical

In [24]:
print("\n===========================\n")
vc_rag_result = vc_rag.search(
    query_text=question,  # User question
    message_history=None,  # Optional message history for conversational context (omit for now)
    examples="",  # Optional examples to guide the LLM's response (defaults to empty string)
    retriever_config=retr_config['VectorCypherRetriever_config']['search_params'],  # Configuration for the vector cypher retriever
    return_context=True,  # Whether to return the context used for generating the answer (default is False)
)
print(f"Vector + Cypher RAG Answer: \n{vc_rag_result.answer}")



Vector + Cypher RAG Answer: 
**Security Report: Sudan (Last Year) and Future Outlook**

**I. Executive Summary**
Sudan has endured a year of escalating civil war, primarily between the Sudanese Armed Forces (SAF) and the Rapid Support Forces (RSF). The conflict has intensified, marked by a significant expansion of drone warfare, targeting critical infrastructure and civilian areas. This has precipitated a severe humanitarian crisis, widespread displacement, and a near-total collapse of essential services. The forward-looking perspective indicates a grim trajectory, with continued conflict, deepening humanitarian needs, and a high risk of regional destabilization.

**II. Key Security Events (Last Year)**

The past year in Sudan has been defined by the brutal conflict between the SAF and the RSF, with a series of impactful events shaping the security landscape:

*   **Escalation of Drone Warfare and Attacks on Critical Infrastructure:**
    *   **Port Sudan Under Fire:** The RSF demons

In [25]:
print("\n===========================\n")
hy_rag_result = hy_rag.search(
    query_text=question,  # User question
    message_history=None,  # Optional message history for conversational context (omit for now)
    examples="",  # Optional examples to guide the LLM's response (defaults to empty string)
    retriever_config=retr_config['HybridRetriever_config']['search_params'],  # Configuration for the hybrid retriever
    return_context=True,  # Whether to return the context used for generating the answer (default is False)
)
print(f"Hybrid RAG Answer: \n{hy_rag_result.answer}")



Hybrid RAG Answer: 
**Security Report: Sudan - Recent Developments and Future Outlook**

**Date:** November 20, 2023 (Illustrative)

**I. Overview of Recent Security Events (Last Year/Latest Developments)**

Sudan's civil war between the Sudanese Armed Forces (SAF) and the Rapid Support Forces (RSF) has seen significant escalation and geographical expansion in recent weeks, particularly targeting critical infrastructure and government strongholds.

*   **Escalation in Port Sudan:** A major development has been the direct targeting of Port Sudan, a vital government-controlled city and humanitarian hub, by the RSF.
    *   **First RSF Drone Strikes:** The city experienced its first RSF paramilitary drone strikes since the civil war began, marking a new phase of the conflict's reach into previously safer areas. (Source: Editor's note, Owen)
    *   **Targeting of Civilian and Military Facilities:** A drone attack on Port Sudan reportedly targeted civilian facilities, including an air ba

In [26]:
print("\n===========================\n")
hyc_rag_result = hyc_rag.search(
    query_text=question,  # User question
    message_history=None,  # Optional message history for conversational context (omit for now)
    examples="",  # Optional examples to guide the LLM's response (defaults to empty string)
    retriever_config=retr_config['HybridCypherRetriever_config']['search_params'],  # Configuration for the hybrid cypher retriever
    return_context=True,  # Whether to return the context used for generating the answer (default is False)
)
print(f"Hybrid + Cypher RAG Answer: \n{hyc_rag_result.answer}")



Hybrid + Cypher RAG Answer: 
**Security Report: Sudan - The Last Year and Forward Outlook**

**Introduction:**
The past year in Sudan has been defined by a brutal civil war between the Sudanese Armed Forces (SAF) and the Rapid Support Forces (RSF), plunging the nation into a severe security and humanitarian crisis. Events over the last twelve months underscore a deepening conflict with devastating consequences for civilians and critical infrastructure.

**Key Security Events (Last Year):**

1.  **Escalation of Conflict and Strategic Strikes on Critical Infrastructure:**
    *   **Port Sudan Under Attack:** The RSF significantly escalated its offensive capabilities, notably employing drone strikes against Port Sudan, a vital government-controlled Red Sea port. These attacks targeted crucial facilities including **oil depots**, the **Osman Digna Air Base**, a **cargo warehouse**, and other **civilian facilities**, leading to large fires and power outages (Al Arabiya, Sudanese military,

In [27]:
print("\n===========================\n")
t2c_rag_result = t2c_rag.search(
    query_text=question,  # User question
    message_history=None,  # Optional message history for conversational context (omit for now)
    examples="",  # Optional examples to guide the LLM's response (defaults to empty string)
    retriever_config=retr_config['Text2CypherRetriever_config']['search_params'],  # Configuration for the text-to-cypher retriever
    return_context=True,  # Whether to return the context used for generating the answer (default is False)
)
print(f"Text-to-Cypher RAG Answer: \n{t2c_rag_result.answer}")



Text-to-Cypher RAG Answer: 
## Security Report: Sudan - May 2023 to May 2024

**Overview:** Sudan has been gripped by a devastating civil conflict since April 15, 2023, pitting the Sudanese Armed Forces (SAF) against the paramilitary Rapid Support Forces (RSF). This conflict has rapidly escalated into one of the world's worst humanitarian crises, fundamentally reshaping the country's security landscape and threatening its very existence as a unified state. Events have been at the core of this rapid deterioration.

**Key Events and Their Security Impact (Last Year):**

1.  **April 15, 2023 – Onset of Conflict:** The sudden eruption of fighting in Khartoum between the SAF and RSF marked a catastrophic turning point. This event immediately plunged the capital into urban warfare, leading to widespread destruction, looting, and the collapse of essential services. The initial clashes quickly spread, demonstrating the deep-seated power struggle and lack of a political resolution mechanism.


# 3. Closing the driver connection

In [28]:
driver.close()