# Graph RAG Workflow Dev & Test

![graph_rag_workflow](../images/graph_rag_workflow.png)

In [None]:
import boto3
from botocore.config import Config

region_name = "us-west-2"
#high_priority_model = "anthropic.claude-3-5-sonnet-20240620-v1:0"
#high_priority_model = "anthropic.claude-3-sonnet-20240229-v1:0"
high_priority_model = "anthropic.claude-3-haiku-20240307-v1:0"
low_priority_model = "anthropic.claude-3-haiku-20240307-v1:0"

def converse_with_bedrock(sys_prompt, usr_prompt, model_id):
    temperature = 0
    top_p = 0.1
    top_k = 1
    inference_config = {"temperature": temperature, "topP": top_p}
    additional_model_fields = {"top_k": top_k}
    response = boto3_client.converse(
        modelId=model_id, 
        messages=usr_prompt, 
        system=sys_prompt,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields
    )
    return response['output']['message']['content'][0]['text']

def init_boto3_client(region: str):
    retry_config = Config(
        region_name=region,
        retries={"max_attempts": 10, "mode": "standard"}
    )
    return boto3.client("bedrock-runtime", region_name=region, config=retry_config)

def create_prompt(sys_template, user_template, **kwargs):
    sys_prompt = [{"text": sys_template.format(**kwargs)}]
    usr_prompt = [{"role": "user", "content": [{"text": user_template.format(**kwargs)}]}]
    return sys_prompt, usr_prompt

boto3_client = init_boto3_client(region_name)


In [None]:
from py2neo import Graph
import os

os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Graph()

In [None]:
def select_subgraph_dev(question, graph):
    question = question
    query = """
        MATCH (n:Title {level: "1"})
        RETURN n.value, id(n) as node_id
    """
    results = graph.run(query)
    subgraph_list = [(record["n.value"], record["node_id"]) for record in results]
    subgraph_list_with_number = [f"{i}. {subgraph[0]}" for i, subgraph in enumerate(subgraph_list)]

    sys_prompt_template = """ 
    You are an expert engineer well-versed in AWS manual documents. 
    Your task is to select the most appropriate manual document name for the user's question. 
    If there are no relevant documents, provide an empty list (""). """

    usr_prompt_template = """ 
    Please select the single most relevant document name for the given question.

    #Question: {question}

    #Document List: {subgraph_list_with_number}

    #Response Format: Provide only the index number of the selected document (omit any preamble) """
    sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, question=question, subgraph_list_with_number=subgraph_list_with_number)
    
    model_id = low_priority_model 
    selected_id = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    try:
        if selected_id == "":
            return [], "", "generate_answer"

        else: 
            selected_subgraph_id = subgraph_list[int(selected_id)][1]
            print("Selected:", subgraph_list[int(selected_id)][0])
            return [selected_subgraph_id], subgraph_list[int(selected_id)][0], "traverse_child"
    except:
        return [], "", "generate_answer"

In [None]:
# 질문의 주제 선택
question = "available model list in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

In [None]:
# 질문의 주제 선택 (현재 없는 내용)
question = "how to deploy a model in SageMaker"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

In [None]:
class TraverseResult:
    def __init__(self, parent_id, parent_name, child_level, selected_child_ids, child_names, next_action):
        self.parent_id = parent_id
        self.parent_name = parent_name
        self.child_level = child_level
        self.selected_child_ids = selected_child_ids
        self.child_names = child_names
        self.next_action = next_action

In [None]:
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def traverse_child_dev(question, subgraph, graph, target_node):
    parent_id = target_node[0]
    query = """
        MATCH (n)
        WHERE id(n) = $parent_id
        OPTIONAL MATCH (n)-[:HAS_CHILD]->(c)
        RETURN n.value as parent_name, c.level as child_level, c.value as child_name, id(c) as child_id
    """
    params = {"parent_id": parent_id}
    query_results = graph.run(query, params)

    parent_name = None
    child_level = None
    child_list = []
    child_names = []

    for record in query_results:
        if parent_name is None:
            parent_name = record["parent_name"]
        if child_level is None:
            child_level = record["child_level"]
        if record["child_name"] is not None:
            child_list.append((record["child_name"], record["child_id"]))
            child_names.append(record["child_name"])

    print(f"Traversing '{parent_name}'...")

    if not child_list:
        print("No child. Proceed to 'get_contents'...")
        #print(f"Debug: {parent_id}, {parent_name}, {child_level}, [], [], 'get_contents'")
        return TraverseResult(parent_id, parent_name, -1, [], [], "get_contents")

    child_list_with_number = [f"{i}. {child}" for i, child in enumerate(child_list)]
    sys_prompt_template = """ You are an AI assistant specialized in AWS documentation, particularly for Bedrock services. Your task is to identify the most relevant sub-menu(s) from the provided <{subgraph}> manual to answer the user's question about AWS Bedrock.

    Instructions:
        1. Carefully analyze the given list of sub-menus.
        2. Select the menu item(s) most directly related to the user's question.
        3. Respond with the index number(s) of the selected menu item(s), starting from 0.

    Selection criteria:
        1. Prioritize menus that directly address the specific topic or feature mentioned in the question.
        2. Look for keywords related to the question, such as "custom models", "Bedrock", "implementation", etc.
        3. Prefer detailed, specific menu items over general or introductory ones.
        4. If no menu item is sufficiently relevant, it's acceptable to not make a selection.

    Important: Your response should ONLY include the index number(s) of the selected menu item(s), nothing else. """

    usr_prompt_template = """ 
    Question: {question}

    Menu list: 
    {child_list_with_number}

    Response format: {csv_list_response_format} """

    sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, subgraph=subgraph, question=question, child_list_with_number=child_list_with_number, csv_list_response_format=csv_list_response_format)
    model_id = high_priority_model
    selected_ids = converse_with_bedrock(sys_prompt, usr_prompt, model_id)

    try:
        selected_id_list = [int(id.strip()) for id in selected_ids.split(',') if id.strip().isdigit()]

        if not selected_id_list:
            #print(f"Debug1: {parent_id}, {parent_name}, {child_level}, {selected_child_ids}, {selected_child_names}, 'traverse_child'")
            return TraverseResult(parent_id, parent_name, -1, [], [], "get_contents")
        
        selected_child_ids = [child_list[id][1] for id in selected_id_list if id < len(child_list)]
        selected_child_names = [child_list[id][0] for id in selected_id_list if id < len(child_list)]

        #print(f"Debug2: {parent_id}, {parent_name}, {child_level}, {selected_child_ids}, {selected_child_names}, 'traverse_child'")
        return TraverseResult(parent_id, parent_name, child_level, selected_child_ids, selected_child_names, "traverse_child")
    
    except Exception as e:
        #print(f"Debug3: Exception occurred: {str(e)}")
        #print(f"Debug4: {parent_id}, {parent_name}, {child_level}, {selected_child_ids}, {selected_child_names}, 'traverse_child'")
        return TraverseResult(parent_id, parent_name, -1, [], [], "get_contents")

In [None]:
class Context:
    def __init__(self, parent_id, parent_name, contents, contents_length, search_type, k):
        self.parent_id = parent_id
        self.parent_name = parent_name
        self.contents = contents
        self.contents_length = contents_length
        self.search_type = search_type
        self.k = k

In [None]:
traverse_results = []

# 질문의 주제 선택
question = "how to utilize custom models in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

In [None]:
def get_contents_dev(graph, parent_id, k=5):
    count_query = """
        MATCH (n)-[:HAS_CONTENTS]->(c)
        WHERE id(n) = $parent_id
        RETURN count(c) as contents_length, n.value as parent_name
    """
    params = {"parent_id": parent_id}
    count_result = graph.run(count_query, params).data()[0]
    contents_length = count_result['contents_length']
    parent_name = count_result['parent_name']
    print(f"Num Documents: {contents_length}")

    if contents_length <= k * 2:
        search_type = "get_short_documents"
        content_query = """
            MATCH (n)-[:HAS_CONTENTS]->(c)
            WHERE id(n) = $parent_id
            RETURN c.text
            ORDER BY c.order
            LIMIT $k
        """
        params = {"parent_id": parent_id, "k": k}
        content_results = graph.run(content_query, params)
        contents = [record["c.text"] for record in content_results]
        context = " ".join(contents)

    else:
        search_type = "node_level_search"
        context = ""

    return Context(parent_id, parent_name, context, contents_length, search_type, k)

In [None]:
question = "available model list provided in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
print("==============")
print(context.contents)
print("==============")

In [None]:
question = "How to define Agent Actions using SDK in Bedrock's Agent"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
print("==============")
print(context.contents)
print("==============")

In [None]:
from langchain_aws import BedrockEmbeddings
from langchain_community.vectorstores import Neo4jVector

searching_scheme = "full_text" # full_text | keyword | vector
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def node_level_search_dev(question, graph, subgraph, parent_id, parent_name, language = "English", k=5):

    if searching_scheme == "vector":
        sys_prompt_template = """
        You are an expert engineer well-versed in AWS. Based on the user's question, please create a question suitable for vector search to find documents in the manual. ({language})

        Note:
        - Generate the most relevant and characteristic question considering both the document name and the user's question.
        - Prefer natural language questions that well represent specific content of the document.
        """

        usr_prompt_template = """
        #Target document name:
        {subgraph}

        #Question:
        {question}

        #Response format:
        {csv_list_response_format}
        """

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, subgraph=subgraph, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_priority_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)

        index_name = "content_embedding_index"
        embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", region_name=region_name)
        vector_store = Neo4jVector.from_existing_index(
            embedding=embeddings,
            index_name=index_name,
            node_label="Content",
            text_node_property="text", 
            embedding_node_property="embedding"
        )

        question_embedding = embeddings.embed_query(keywords)

        vector_search_query = """
            MATCH (parent)-[:HAS_CONTENTS]->(child:Content)
            WHERE id(parent) = $parent_id
            WITH child
            CALL db.index.vector.queryNodes($index_name, $k, $question_embedding) YIELD node, score
            WHERE node = child
            RETURN id(node) AS node_id, node.text AS text, score
            ORDER BY score DESC
            LIMIT $k
        """

        params = {
            "parent_id": parent_id,
            "question_embedding": question_embedding,
            "k": k,
            "index_name": index_name
        }

        search_results = vector_store.query(vector_search_query, params=params)
        
    else:
        sys_prompt_template = """
        You are an expert engineer well-versed in AWS. Based on the user's question, extract one core keyword from the manual.
        The keyword must meet the following conditions:
        1. No special characters like '_', '-' in the keyword (e.g., respond with 'custom model' instead of 'custom_model')
        2. Choose the most appropriate word within the given document name that fits the context of the question (no need to include the document name in the keyword)
        3. Select specific words that represent the particular function or concept you're searching for, rather than content already included in the document name
        4. Provide the keyword in {language}

        Note:
        - Consider both the document name and the question to select the most relevant and characteristic word within that document.
        - Prefer words that represent specific content of the document rather than overly general terms.
        """

        usr_prompt_template = """
        #Target document name:
        {parent_name}

        #Question:
        {question}

        #Response format:
        {csv_list_response_format}
        """
        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, parent_name=parent_name, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_priority_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    

        if searching_scheme == "full_text":
            search_query = """MATCH (parent)-[:HAS_CONTENTS]->(child)
                WHERE id(parent) = $parent_id
                WITH child
                CALL db.index.fulltext.queryNodes("Search_Content_by_FullText", $keywords) YIELD node, score
                WHERE node = child
                RETURN node.text as text, score
                ORDER BY score DESC
                LIMIT $k
            """
        elif searching_scheme == "keyword":
            search_query = """MATCH (parent)-[:HAS_CONTENTS]->(child)
                WHERE id(parent) = $parent_id
                WITH child, $keywords AS keyword
                WHERE child.text CONTAINS keyword
                RETURN child.text AS text, 
                    size(split(toLower(child.text), toLower(keyword))) - 1 AS score
                ORDER BY score DESC
                LIMIT $k
            """
        
        params = {"parent_id": parent_id, "keywords": keywords, "k": k}
        search_results = graph.run(search_query, params)
        
    content = "\n\n\n".join(f"{record['text']} (Score: {record['score']})" for record in search_results)
    return content

In [None]:
question = "How to define Agent Actions using SDK in Bedrock's Agent"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    search_content = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)
    print(search_content)

In [None]:
def check_relevance_dev(question, content, parent_name, contents_length, search_type, k=5):
    optional_prompt1 = ""
    optional_prompt2 = ""
    
    if search_type == "get_short_documents" and contents_length > k:
        optional_prompt1 = "- Preliminary information matches the question's intent but further content needs to be checked: 'Partial'"
        optional_prompt2 = "or `Partial`"

    sys_prompt_template = """
    You are a skilled data analyst. Your task is to determine whether the given question can be answered using only the provided preliminary information, based on the following criteria:

    Judgment criteria:
    1. Do the key keywords of the question appear in the document name or preliminary information?
    2. Does it contain the specific information we're trying to find out?

    Response method:
    - If the document name and preliminary information are irrelevant to the question: 'None'
    - If the question can be answered with the preliminary information alone: 'Complete'
    {partial1}

    Skip any preamble and respond only with `None` or `Complete`{partial2}.
    """

    usr_prompt_template = """
    #Preliminary information (Document name: {parent_name})
    {context}

    #Question: {question}
    """

    sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, partial1=optional_prompt1, partial2=optional_prompt2, parent_name=parent_name, question=question, context=content)
    model_id = high_priority_model
    status = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    
    return status

In [None]:
question = "How to encrypt data in model customization tasks"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
question = "How to utilize custom models in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
question = "How to utilize the memory feature in Bedrock Agent"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
def get_sibling_contents_dev(graph, parent_id, content, k=5):
    content_query = """
        MATCH (n)-[:HAS_CONTENTS]->(c)
        WHERE id(n) = $parent_id AND c.order >= $order_pos
        RETURN c.text
        ORDER BY c.order
        LIMIT $k
    """       
    trial = 1
    order_pos = k * trial
    params = {"parent_id": parent_id, "k": k, "order_pos": order_pos} 

    content_results = graph.run(content_query, params)
    sibling_content = [record["c.text"] for record in content_results]
    contents = " ".join([content] + sibling_content)

    return contents

In [None]:
question = "web pages as a data source for Knowledge Base in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
# 질문의 주제 선택 
question = "Response format when using the Converse API in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
# Not Found
question = "pricing policy in Amazon Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
from langchain_aws import BedrockEmbeddings
from langchain_community.vectorstores import Neo4jVector

searching_scheme = "full_text" # full_text | keyword | vector
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def subgraph_level_search_dev(question, graph, subgraph, language = "English", k=5):

    if searching_scheme == "vector":
        print("vector search started")

        sys_prompt_template = """
        You are an expert engineer well-versed in AWS. Based on the user's question, please create a question suitable for vector search to find documents in the manual. ({language})

        Note:
        - Generate the most relevant and characteristic question considering both the document name and the user's question.
        - Prefer natural language questions that well represent specific content of the document.
        """

        usr_prompt_template = """
        #Target document name:
        {subgraph}

        #Question:
        {question}

        #Response format:
        {csv_list_response_format}
        """

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, subgraph=subgraph, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_priority_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
        print(keywords)
        index_name = "content_embedding_index"

        embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", region_name=region_name)
        question_embedding = embeddings.embed_query(keywords)

        vector_search_query = """
        MATCH (root:Title {level: "1", value: $subgraph})
        MATCH (root)-[:HAS_CHILD*0..]->(title:Title)-[:HAS_CONTENTS]->(content:Content)

        CALL db.index.vector.queryNodes($index_name, $k, $question_embedding) YIELD node, score
        WHERE node = content

        RETURN node.text AS text, score
        ORDER BY score DESC
        LIMIT $k
        """

        params = {
            "subgraph": subgraph,
            "question_embedding": question_embedding,
            "k": k,
            "index_name": index_name
        }

        vector_store = Neo4jVector.from_existing_index(
            embedding=embeddings,
            index_name=index_name,
            node_label="Content",
            text_node_property="text", 
            embedding_node_property="embedding"
        )

        search_results = vector_store.query(vector_search_query, params=params)

    else:
        print("text search started")
        sys_prompt_template = """
        You are an expert engineer well-versed in AWS. Based on the user's question, extract one core keyword from the manual.
        The keyword must meet the following conditions:
        1. No special characters like '_', '-' in the keyword (e.g., respond with 'custom model' instead of 'custom_model')
        2. Choose the most appropriate word within the given document name that fits the context of the question (no need to include the document name in the keyword)
        3. Select specific words that represent the particular function or concept you're searching for, rather than content already included in the document name
        4. Provide the keyword in {language}

        Note:
        - Consider both the document name and the question to select the most relevant and characteristic word within that document.
        - Prefer words that represent specific content of the document rather than overly general terms.
        """

        usr_prompt_template = """
        #Target document name:
        {subgraph}

        #Question:
        {question}

        #Response format:
        {csv_list_response_format}
        """
        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, subgraph=subgraph, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_priority_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    
        
        if searching_scheme == "full_text":
            search_query = """
                MATCH (root:Title {level: "1", value: $subgraph})
                MATCH (root)-[:HAS_CHILD*0..]->(title:Title)-[:HAS_CONTENTS]->(content:Content)

                CALL db.index.fulltext.queryNodes("Search_Content_by_FullText", $keywords) YIELD node, score
                WHERE node = content

                RETURN node.text as text, score, title.name as title_name, title.level as title_level
                ORDER BY score DESC
                LIMIT $k
            """
        elif searching_scheme == "keyword":
            search_query = """
                MATCH (root:Title {level: "1", value: $subgraph})
                MATCH (root)-[:HAS_CHILD*0..]->(title:Title)-[:HAS_CONTENTS]->(content:Content)
                WITH content, title, $keywords AS keyword
                WHERE content.text CONTAINS keyword
                RETURN content.text AS text, 
                    size(split(toLower(content.text), toLower(keyword))) - 1 AS score,
                    {
                        title: title.name,
                        level: title.level,
                        value: title.value
                    } AS metadata
                ORDER BY score DESC
                LIMIT $k
            """
        
        params = { "subgraph": subgraph, "k": k, "keywords": keywords}
        search_results = graph.run(search_query, params)
        
    content = "\n\n\n".join(f"{record['text']} (Score: {record['score']})" for record in search_results)
    return content

In [None]:
# Not Found
question = "pricing policy in Amazon Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

print(context.contents)

In [None]:
from langchain_aws import BedrockEmbeddings
from langchain_community.vectorstores import Neo4jVector

searching_scheme = "vector" # full_text | keyword | vector
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def global_search_dev(question, graph, language = "English", k=5):

    if searching_scheme == "vector":
        sys_prompt_template = """
        You are an expert engineer well-versed in AWS. Based on the user's question, please create a question suitable for vector search to find documents in the manual. ({language})

        Note:
        - Generate the most relevant and characteristic question considering both the document name and the user's question.
        - Prefer natural language questions that well represent specific content of the document.
        """

        usr_prompt_template = """
        #Question:
        {question}

        #Response format:
        {csv_list_response_format}
        """

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_priority_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)

        index_name = "content_embedding_index"

        embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", region_name=region_name)
        question_embedding = embeddings.embed_query(keywords)

        vector_search_query = """
        CALL db.index.vector.queryNodes($index_name, $k, $question_embedding) YIELD node, score
        WITH DISTINCT node, score
        WHERE node:Content
        RETURN node.text AS text, score
        ORDER BY score DESC
        """

        params = {
            "question_embedding": question_embedding,
            "k": k,
            "index_name": index_name
        }

        vector_store = Neo4jVector.from_existing_index(
            embedding=embeddings,
            index_name=index_name,
            node_label="Content",
            text_node_property="text", 
            embedding_node_property="embedding"
        )

        search_results = vector_store.query(vector_search_query, params=params)

    else:
        sys_prompt_template = """
        You are an expert engineer well-versed in AWS. Based on the user's question, extract one core keyword from the manual.
        The keyword must meet the following conditions:
        1. No special characters like '_', '-' in the keyword (e.g., respond with 'custom model' instead of 'custom_model')
        2. Choose the most appropriate word within the given document name that fits the context of the question (no need to include the document name in the keyword)
        3. Select specific words that represent the particular function or concept you're searching for, rather than content already included in the document name
        4. Provide the keyword in {language}

        Note:
        - Consider both the document name and the question to select the most relevant and characteristic word within that document.
        - Prefer words that represent specific content of the document rather than overly general terms.
        """

        usr_prompt_template = """
        #Question:
        {question}

        #Response format:
        {csv_list_response_format}
        """
        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_priority_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
        print(keywords)
        
        if searching_scheme == "full_text":
            search_query ="""
            CALL db.index.fulltext.queryNodes("Search_Content_by_FullText", $keywords) YIELD node, score
            WHERE node:Content
            OPTIONAL MATCH (title:Title)-[:HAS_CONTENTS]->(node)
            RETURN node.text as text, score, title.name as title_name, title.level as title_level
            ORDER BY score DESC
            LIMIT $k
            """
        elif searching_scheme == "keyword":
            search_query = """
            MATCH (content:Content)
            WITH content, $keywords AS keyword
            WHERE content.text CONTAINS keyword
            OPTIONAL MATCH (title:Title)-[:HAS_CONTENTS]->(content)
            RETURN content.text AS text, 
                size(split(toLower(content.text), toLower(keyword))) - 1 AS score,
                {
                    title: title.name,
                    level: title.level,
                    value: title.value
                } AS metadata
            ORDER BY score DESC
            LIMIT $k
            """
        
        params = {"k": k, "keywords": keywords}
        search_results = graph.run(search_query, params)
        
    content = "\n\n\n".join(f"{record['text']} (Score: {record['score']})" for record in search_results)
    return content

In [None]:
question = "pricing policy in Amazon Bedrock"

content = global_search_dev(question, graph, 'English', 5)
print(content)

In [None]:
from langchain_aws import ChatBedrock
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

def generate_answer_dev(question, context):
    # Prompt setting
    sys_prompt_template = """
    You are an expert engineer well-versed in AWS. Generate an answer to the user's question using only the given preliminary information. If the question asks about content not provided in the preliminary information, respond that you don't know."""

    usr_prompt_template = """
    #Preliminary information: 
    {context}

    #User question:
    {question}
    """
    prompt = ChatPromptTemplate.from_messages([("system", sys_prompt_template), ("human",usr_prompt_template)])

    # Model setting
    model_kwargs = {
            "temperature": 0.5,
            "max_tokens": 4096
        }
    llm = ChatBedrock(model_id=high_priority_model, region_name="us-west-2", model_kwargs=model_kwargs, streaming=True)   

    # Output setting
    parser = StrOutputParser()

    # Chain
    chain = prompt | llm | parser
    for chunk in chain.stream({"context": context, "question": question}):
        print(chunk, end="", flush=True)

In [None]:
question = "How to optimize the performance in Bedrock's Agent"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)

In [None]:
question = "How to evaluate Knowledge Base in Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)

In [None]:
question = "pricing policy for Bedrock"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)

In [None]:
question = "What is Amazon Bedrock Playground?"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, subgraph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)