In [None]:
import boto3
from botocore.config import Config

region_name = "us-west-2"
#high_level_model = "anthropic.claude-3-5-sonnet-20240620-v1:0"
#high_level_model = "anthropic.claude-3-sonnet-20240229-v1:0"
high_level_model = "anthropic.claude-3-haiku-20240307-v1:0"
low_level_model = "anthropic.claude-3-haiku-20240307-v1:0"

def converse_with_bedrock(sys_prompt, usr_prompt, model_id):
    temperature = 0
    top_p = 0.1
    top_k = 1
    inference_config = {"temperature": temperature, "topP": top_p}
    additional_model_fields = {"top_k": top_k}
    response = boto3_client.converse(
        modelId=model_id, 
        messages=usr_prompt, 
        system=sys_prompt,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields
    )
    return response['output']['message']['content'][0]['text']

def init_boto3_client(region: str):
    retry_config = Config(
        region_name=region,
        retries={"max_attempts": 10, "mode": "standard"}
    )
    return boto3.client("bedrock-runtime", region_name=region, config=retry_config)


def create_prompt(sys_template, user_template, **kwargs):
    sys_prompt = [{"text": sys_template.format(**kwargs)}]
    usr_prompt = [{"role": "user", "content": [{"text": user_template.format(**kwargs)}]}]
    return sys_prompt, usr_prompt

boto3_client = init_boto3_client(region_name)


In [None]:
from py2neo import Graph
import os

os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Graph()

In [None]:
def select_subgraph_dev(question, graph):
    question = question
    query = """
        MATCH (n:Title {level: "1"})
        RETURN n.value, id(n) as node_id
    """
    results = graph.run(query)
    subgraph_list = [(record["n.value"], record["node_id"]) for record in results]
    subgraph_list_with_number = [f"{i}. {subgraph[0]}" for i, subgraph in enumerate(subgraph_list)]

    sys_prompt_template = """ 
    You are an expert engineer well-versed in AWS manual documents. 
    Your task is to select the most appropriate manual document name for the user's question. 
    If there are no relevant documents, provide an empty list (""). """

    usr_prompt_template = """ 
    Please select the single most relevant document name for the given question.

    #Question: {question}

    #Document List: {subgraph_list_with_number}

    #Response Format: Provide only the index number of the selected document (omit any preamble) """
    sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, question=question, subgraph_list_with_number=subgraph_list_with_number)
    
    model_id = low_level_model 
    selected_id = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    try:
        if selected_id == "":
            return [], "", "generate_answer"

        else: 
            selected_subgraph_id = subgraph_list[int(selected_id)][1]
            print("Selected:", subgraph_list[int(selected_id)][0])
            return [selected_subgraph_id], subgraph_list[int(selected_id)][0], "traverse_child"
    except:
        return [], "", "generate_answer"

In [None]:
# 질문의 주제 선택
question = "Bedrock에서 제공하는 모델 목록"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

In [None]:
# 질문의 주제 선택 (현재 없는 내용)
question = "SageMaker에서 모델을 배포하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

In [None]:
class TraverseResult:
    def __init__(self, parent_id, parent_name, child_level, selected_child_ids, child_names, next_action):
        self.parent_id = parent_id
        self.parent_name = parent_name
        self.child_level = child_level
        self.selected_child_ids = selected_child_ids
        self.child_names = child_names
        self.next_action = next_action

In [None]:
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def traverse_child_dev(question, subgraph, graph, target_node):
    parent_id = target_node[0]
    query = """
        MATCH (n)
        WHERE id(n) = $parent_id
        OPTIONAL MATCH (n)-[:HAS_CHILD]->(c)
        RETURN n.value as parent_name, c.level as child_level, c.value as child_name, id(c) as child_id
    """
    params = {"parent_id": parent_id}
    query_results = graph.run(query, params)

    parent_name = None
    child_level = None
    child_list = []
    child_names = []

    for record in query_results:
        if parent_name is None:
            parent_name = record["parent_name"]
        if child_level is None:
            child_level = record["child_level"]
        if record["child_name"] is not None:
            child_list.append((record["child_name"], record["child_id"]))
            child_names.append(record["child_name"])

    print(f"Traversing '{parent_name}'...")

    if not child_list:
        print("No child. Proceed to 'get_contents'...")
        #print(f"Debug: {parent_id}, {parent_name}, {child_level}, [], [], 'get_contents'")
        return TraverseResult(parent_id, parent_name, -1, [], [], "get_contents")

    child_list_with_number = [f"{i}. {child}" for i, child in enumerate(child_list)]
    sys_prompt_template = """
    당신은 AWS 매뉴얼 문서에 정통한 전문 엔지니어입니다.
    당신의 임무는 사용자의 질문에 답변하기 위해, <{subgraph}> 매뉴얼 문서에서 가장 관련성 높은 하위 메뉴를 선택하는 것입니다.

    작업 순서:
    1. 주어진 하위 메뉴 목록을 검토하여 직접적으로 연관된 메뉴들을 찾습니다.
    2. 연관성이 가장 높은 메뉴를 선택하여, 인덱스 번호(0부터 시작)로 응답합니다.
    3. 질문과 매우 밀접한 메뉴가 1개 이상인 경우, 선택한 메뉴의 인덱스 번호 목록으로 응답합니다.

    선택 기준:
    - 질문의 핵심 키워드와 일치하고 질문의 맥락에 맞는 메뉴를 우선적으로 고려하세요.
    - 일반적 가이드보다는 질문의 특정 주제나 기능을 다루는 메뉴를 선호합니다. 예를 들어, 'Getting started' 가이드보다는 특정 기능이나 서비스에 대한 상세 설명이 있는 항목을 선호합니다.
    - 반드시 선택을 해야하는 것은 아닙니다. 연관성이 낮거나 불확실한 메뉴는 선택하지 마세요.

    """
    usr_prompt_template = """
    #질문: {question}

    #메뉴 목록:
    {child_list_with_number}

    #응답 형식: {csv_list_response_format}.
    """


    sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, subgraph=subgraph, question=question, child_list_with_number=child_list_with_number, csv_list_response_format=csv_list_response_format)
    model_id = high_level_model
    selected_ids = converse_with_bedrock(sys_prompt, usr_prompt, model_id)

    try:
        selected_id_list = [int(id.strip()) for id in selected_ids.split(',') if id.strip().isdigit()]

        if not selected_id_list:
            #print(f"Debug1: {parent_id}, {parent_name}, {child_level}, {selected_child_ids}, {selected_child_names}, 'traverse_child'")
            return TraverseResult(parent_id, parent_name, -1, [], [], "get_contents")
        
        selected_child_ids = [child_list[id][1] for id in selected_id_list if id < len(child_list)]
        selected_child_names = [child_list[id][0] for id in selected_id_list if id < len(child_list)]

        #print(f"Debug2: {parent_id}, {parent_name}, {child_level}, {selected_child_ids}, {selected_child_names}, 'traverse_child'")
        return TraverseResult(parent_id, parent_name, child_level, selected_child_ids, selected_child_names, "traverse_child")
    
    except Exception as e:
        #print(f"Debug3: Exception occurred: {str(e)}")
        #print(f"Debug4: {parent_id}, {parent_name}, {child_level}, {selected_child_ids}, {selected_child_names}, 'traverse_child'")
        return TraverseResult(parent_id, parent_name, -1, [], [], "get_contents")

In [None]:
class Context:
    def __init__(self, parent_id, parent_name, contents, contents_length, search_type, k):
        self.parent_id = parent_id
        self.parent_name = parent_name
        self.contents = contents
        self.contents_length = contents_length
        self.search_type = search_type
        self.k = k

In [None]:
traverse_results = []

# 질문의 주제 선택
question = "Bedrock에서 Custom Model 활용 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

In [None]:
def get_contents_dev(graph, parent_id, k=5):
    count_query = """
        MATCH (n)-[:HAS_CONTENTS]->(c)
        WHERE id(n) = $parent_id
        RETURN count(c) as contents_length, n.value as parent_name
    """
    params = {"parent_id": parent_id}
    count_result = graph.run(count_query, params).data()[0]
    contents_length = count_result['contents_length']
    parent_name = count_result['parent_name']
    print(f"Num Documents: {contents_length}")

    if contents_length <= k * 2:
        search_type = "get_short_documents"
        content_query = """
            MATCH (n)-[:HAS_CONTENTS]->(c)
            WHERE id(n) = $parent_id
            RETURN c.text
            ORDER BY c.order
            LIMIT $k
        """
        params = {"parent_id": parent_id, "k": k}
        content_results = graph.run(content_query, params)
        contents = [record["c.text"] for record in content_results]
        context = " ".join(contents)

    else:
        search_type = "node_level_search"
        context = ""

    return Context(parent_id, parent_name, context, contents_length, search_type, k)

In [None]:
# 질문의 주제 선택
question = "Bedrock에서 활용가능한 모델 목록"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
print("==============")
print(context.contents)
print("==============")

In [None]:
# 질문의 주제 선택
question = "Bedrock의 Agent에서 SDK를 활용해서 Agent Action을 정의하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)

In [None]:
from langchain_aws import BedrockEmbeddings
from langchain.vectorstores import Neo4jVector

searching_scheme = "full_text" # full_text | keyword | vector
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def node_level_search_dev(question, graph, parent_id, parent_name, language = "English", k=5):

    if searching_scheme == "vector":
        sys_prompt_template = """
            당신은 AWS에 정통한 전문 엔지니어입니다. 사용자의 질문을 바탕으로 매뉴얼에서 벡터 검색으로 문서를 찾아내기에 적합한 질문을 만들어주세요.({language})
            
            주의: 

            - 문서 이름과 질문을 고려하여 해당 문서 내에서 가장 관련성 높고 특징적인 질문을 생성하세요.
            - 문서의 특정 내용을 잘 나타내는 자연어 질문을 선호합니다.
        """
        usr_prompt_template = "#검색 대상 문서 이름:\n{subgraph}\n\n#질문:\n{question}\n\n #응답 형식:\n{csv_list_response_format}"

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, subgraph=subgraph, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_level_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)

        index_name = "content_embedding_index"
        embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", region_name=region_name)
        vector_store = Neo4jVector.from_existing_index(
            embedding=embeddings,
            index_name=index_name,
            node_label="Content",
            text_node_property="text", 
            embedding_node_property="embedding"
        )

        question_embedding = embeddings.embed_query(keywords)

        vector_search_query = """
            MATCH (parent)-[:HAS_CONTENTS]->(child:Content)
            WHERE id(parent) = $parent_id
            WITH child
            CALL db.index.vector.queryNodes($index_name, $k, $question_embedding) YIELD node, score
            WHERE node = child
            RETURN id(node) AS node_id, node.text AS text, score
            ORDER BY score DESC
            LIMIT $k
        """

        params = {
            "parent_id": parent_id,
            "question_embedding": question_embedding,
            "k": k,
            "index_name": index_name
        }

        search_results = vector_store.query(vector_search_query, params=params)
        
    else:
        sys_prompt_template = """
        당신은 AWS에 정통한 전문 엔지니어입니다. 사용자의 질문을 바탕으로 매뉴얼에서 핵심 키워드를 1개 추출합니다.
        키워드는 다음 조건을 반드시 만족해야 합니다:
        1. 키워드에 '_', '-' 등의 특수 문자 포함 금지 (예: custom_model 대신 custom model로 응답)
        2. 주어진 문서 이름 내에서 질문의 맥락에 가장 적합한 단어를 선택 (문서 이름은 키워드에 포함할 필요가 없음)
        2. 문서 이름에 이미 포함된 내용보다는 검색하려는 특정 기능 및 개념을 잘 나타내는 구체적 단어를 선택
        3. {language} 키워드 제공 
        
        주의: 
        - 문서 이름과 질문을 고려하여 해당 문서 내에서 가장 관련성 높고 특징적인 단어를 선택하세요.
        - 너무 일반적인 단어보다는 문서의 특정 내용을 잘 나타내는 단어를 선호합니다.
        """
        usr_prompt_template = "#검색 대상 문서 이름:\n{parent_name}\n\n#질문:\n{question}\n\n #응답 형식:\n{csv_list_response_format}"

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, parent_name=parent_name, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_level_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    

        if searching_scheme == "full_text":
            search_query = """MATCH (parent)-[:HAS_CONTENTS]->(child)
                WHERE id(parent) = $parent_id
                WITH child
                CALL db.index.fulltext.queryNodes("Search_Content_by_FullText", $keywords) YIELD node, score
                WHERE node = child
                RETURN node.text as text, score
                ORDER BY score DESC
                LIMIT $k
            """
        elif searching_scheme == "keyword":
            search_query = """MATCH (parent)-[:HAS_CONTENTS]->(child)
                WHERE id(parent) = $parent_id
                WITH child, $keywords AS keyword
                WHERE child.text CONTAINS keyword
                RETURN child.text AS text, 
                    size(split(toLower(child.text), toLower(keyword))) - 1 AS score
                ORDER BY score DESC
                LIMIT $k
            """
        
        params = {"parent_id": parent_id, "keywords": keywords, "k": k}
        search_results = graph.run(search_query, params)
        
    content = "\n\n\n".join(f"{record['text']} (Score: {record['score']})" for record in search_results)
    return content

In [None]:
# 질문의 주제 선택
question = "Bedrock의 Agent에서 SDK를 활용해서 Agent Action을 정의하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    search_content = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)
    print(search_content)

In [None]:
def check_relevance_dev(question, content, parent_name, contents_length, search_type, k=5):
    optional_prompt1 = ""
    optional_prompt2 = ""
    
    if search_type == "get_short_documents" and contents_length > k:
        optional_prompt1 = "- 사전 정보가 질문 취지에 부합하지만 뒷 내용 추가 확인 필요: 'Partial'"
        optional_prompt2 = "또는 `Partial`"

    sys_prompt_template = """
    당신은 유능한 데이터 분석가입니다. 당신의 임무는 오직 주어진 사전 정보만을 활용하여 질문에 답변 가능한지, 아래의 기준으로 판단하는 것입니다.
    
    판단 기준:
    1. 질문의 핵심 키워드가 문서 이름 또는 사전 정보에 등장하는가?
    2. 알아내고자 하는 구체적 정보가 포함되어 있는가?

    응답 방법:
    - 문서 이름 및 사전 정보가 질문과 관련 없음: 'None'
    - 사전 정보만으로 질문에 답변 가능: 'Complete'
    {partial1}

    서두는 생략하고, `None` 또는 `Complete`{partial2}으로만 답변하세요.
    """
    usr_prompt_template = """
    #사전 정보 (문서이름: {parent_name})
    {context}
    
    #질문: {question}
    """

    sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, partial1=optional_prompt1, partial2=optional_prompt2, parent_name=parent_name, question=question, context=content)
    model_id = high_level_model
    status = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    
    return status

In [None]:
# 질문의 주제 선택
question = "model customization 작업에서 데이터 암호화하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    search_content = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)
    print("==============")
    print(search_content)
    print("==============")
    status = check_relevance_dev(question, search_content, context.parent_name, context.contents_length, context.search_type, context.k)
else:
    print("==============")
    print(context.contents)
    print("==============")
    status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
# 질문의 주제 선택
question = "Bedrock의 Custom Model 활용 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    search_content = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)
    print("==============")
    print(search_content)
    print("==============")
    status = check_relevance_dev(question, search_content, context.parent_name, context.contents_length, context.search_type, context.k)
else:
    print("==============")
    print(context.contents)
    print("==============")
    status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
# 질문의 주제 선택
question = "Bedrock Agent에서 memory 기능을 활용하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("context:", context.parent_name)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    search_content = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)
    print("==============")
    print(search_content)
    print("==============")
    status = check_relevance_dev(question, search_content, context.parent_name, context.contents_length, context.search_type, context.k)
else:
    print("==============")
    print(context.contents)
    print("==============")
    status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
def get_sibling_contents_dev(graph, parent_id, content, k=5):
    content_query = """
        MATCH (n)-[:HAS_CONTENTS]->(c)
        WHERE id(n) = $parent_id AND c.order >= $order_pos
        RETURN c.text
        ORDER BY c.order
        LIMIT $k
    """       
    trial = 1
    order_pos = k * trial
    params = {"parent_id": parent_id, "k": k, "order_pos": order_pos} 

    content_results = graph.run(content_query, params)
    sibling_content = [record["c.text"] for record in content_results]
    contents = " ".join([content] + sibling_content)

    return contents

In [None]:
# 질문의 주제 선택
question = "Bedrock에서 Knowledgebase의 소스로 웹 페이지를 사용하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

print("==============")
print(context.contents)
print("==============")
status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
# 질문의 주제 선택 
question = "Bedrock에서 Converse API를 활용할 때 응답 양식"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

print("==============")
print(context.contents)
print("==============")
status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
# 질문의 주제 선택 (찾을 수 없는 정보)
question = "Bedrock의 가격 정책"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

print("==============")
print(context.contents)
print("==============")
status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

In [None]:
from langchain_aws import BedrockEmbeddings
from langchain.vectorstores import Neo4jVector

searching_scheme = "keyword" # full_text | keyword | vector
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def subgraph_level_search_dev(question, graph, subgraph, language = "English", k=5):

    if searching_scheme == "vector":
        print("vector search started")

        sys_prompt_template = """
            당신은 AWS에 정통한 전문 엔지니어입니다. 사용자의 질문을 바탕으로 매뉴얼에서 벡터 검색으로 문서를 찾아내기에 적합한 질문을 만들어주세요.({language})
            
            주의: 

            - 문서 이름과 질문을 고려하여 해당 문서 내에서 가장 관련성 높고 특징적인 질문을 생성하세요.
            - 문서의 특정 내용을 잘 나타내는 자연어 질문을 선호합니다.
        """
        usr_prompt_template = "#검색 대상 문서 이름:\n{subgraph}\n\n#질문:\n{question}\n\n #응답 형식:\n{csv_list_response_format}"

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, subgraph=subgraph, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_level_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
        print(keywords)
        index_name = "content_embedding_index"

        embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", region_name=region_name)
        question_embedding = embeddings.embed_query(keywords)

        vector_search_query = """
        MATCH (root:Title {level: "1", value: $subgraph})
        MATCH (root)-[:HAS_CHILD*0..]->(title:Title)-[:HAS_CONTENTS]->(content:Content)

        CALL db.index.vector.queryNodes($index_name, $k, $question_embedding) YIELD node, score
        WHERE node = content

        RETURN node.text AS text, score
        ORDER BY score DESC
        LIMIT $k
        """

        params = {
            "subgraph": subgraph,
            "question_embedding": question_embedding,
            "k": k,
            "index_name": index_name
        }

        vector_store = Neo4jVector.from_existing_index(
            embedding=embeddings,
            index_name=index_name,
            node_label="Content",
            text_node_property="text", 
            embedding_node_property="embedding"
        )

        search_results = vector_store.query(vector_search_query, params=params)

    else:
        print("text search started")
        sys_prompt_template = """
        당신은 AWS에 정통한 전문 엔지니어입니다. 사용자의 질문을 바탕으로 매뉴얼에서 핵심 키워드를 1개 추출합니다.
        키워드는 다음 조건을 반드시 만족해야 합니다:
        1. 키워드에 '_', '-' 등의 특수 문자 포함 금지 (예: custom_model 대신 custom model로 응답)
        2. 주어진 문서 이름 내에서 질문의 맥락에 가장 적합한 단어를 선택 (문서 이름은 키워드에 포함할 필요가 없음)
        2. 문서 이름에 이미 포함된 내용보다는 검색하려는 특정 기능 및 개념을 잘 나타내는 구체적 단어를 선택
        3. {language} 키워드 제공 
        
        주의: 
        - 문서 이름과 질문을 고려하여 해당 문서 내에서 가장 관련성 높고 특징적인 단어를 선택하세요.
        - 너무 일반적인 단어보다는 문서의 특정 내용을 잘 나타내는 단어를 선호합니다.
        """
        usr_prompt_template = "#검색 대상 문서 이름:\n{subgraph}\n\n#질문:\n{question}\n\n #응답 형식:\n{csv_list_response_format}"
        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, subgraph=subgraph, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_level_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
    
        
        if searching_scheme == "full_text":
            search_query = """
                MATCH (root:Title {level: "1", value: $subgraph})
                MATCH (root)-[:HAS_CHILD*0..]->(title:Title)-[:HAS_CONTENTS]->(content:Content)

                CALL db.index.fulltext.queryNodes("Search_Content_by_FullText", $keywords) YIELD node, score
                WHERE node = content

                RETURN node.text as text, score, title.name as title_name, title.level as title_level
                ORDER BY score DESC
                LIMIT $k
            """
        elif searching_scheme == "keyword":
            search_query = """
                MATCH (root:Title {level: "1", value: $subgraph})
                MATCH (root)-[:HAS_CHILD*0..]->(title:Title)-[:HAS_CONTENTS]->(content:Content)
                WITH content, title, $keywords AS keyword
                WHERE content.text CONTAINS keyword
                RETURN content.text AS text, 
                    size(split(toLower(content.text), toLower(keyword))) - 1 AS score,
                    {
                        title: title.name,
                        level: title.level,
                        value: title.value
                    } AS metadata
                ORDER BY score DESC
                LIMIT $k
            """
        
        params = { "subgraph": subgraph, "k": k, "keywords": keywords}
        search_results = graph.run(search_query, params)
        
    content = "\n\n\n".join(f"{record['text']} (Score: {record['score']})" for record in search_results)
    return content

In [None]:
# 질문의 주제 선택 (찾을 수 없는 정보)
question = "Bedrock의 가격 정책"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

print(context.contents)

In [None]:
from langchain_aws import BedrockEmbeddings
from langchain.vectorstores import Neo4jVector

searching_scheme = "vector" # full_text | keyword | vector
csv_list_response_format = "Your response should be a list of comma separated values, eg: `foo, bar` or `foo,bar`"

def global_search_dev(question, graph, language = "English", k=5):

    if searching_scheme == "vector":
        sys_prompt_template = """
            당신은 AWS에 정통한 전문 엔지니어입니다. 사용자의 질문을 바탕으로 매뉴얼에서 벡터 검색으로 문서를 찾아내기에 적합한 질문을 만들어주세요.({language})
            
            주의: 

            - 질문을 고려하여 가장 관련성 높고 특징적인 질문을 생성하세요.
            - 사용자의 질문 의도를 잘 반영하는 자연어 질문을 선호합니다.
        """
        usr_prompt_template = "#\n\n#질문:\n{question}\n\n #응답 형식:\n{csv_list_response_format}"

        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_level_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)

        index_name = "content_embedding_index"

        embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", region_name=region_name)
        question_embedding = embeddings.embed_query(keywords)

        vector_search_query = """
        CALL db.index.vector.queryNodes($index_name, $k, $question_embedding) YIELD node, score
        WITH DISTINCT node, score
        WHERE node:Content
        RETURN node.text AS text, score
        ORDER BY score DESC
        """

        params = {
            "question_embedding": question_embedding,
            "k": k,
            "index_name": index_name
        }

        vector_store = Neo4jVector.from_existing_index(
            embedding=embeddings,
            index_name=index_name,
            node_label="Content",
            text_node_property="text", 
            embedding_node_property="embedding"
        )

        search_results = vector_store.query(vector_search_query, params=params)

    else:
        sys_prompt_template = """
        당신은 AWS에 정통한 전문 엔지니어입니다. 사용자의 질문을 바탕으로 매뉴얼에서 핵심 키워드를 2개 추출합니다.
        키워드는 다음 조건을 반드시 만족해야 합니다:
        1. 키워드에 '_', '-' 등의 특수 문자 포함 금지 (예: custom_model 대신 custom model로 응답)
        2. 문서 이름에 이미 포함된 내용보다는 검색하려는 특정 기능 및 개념을 잘 나타내는 구체적 단어를 선택
        3. {language} 키워드 제공 
        
        주의: 
        - 너무 일반적인 단어보다는 서비스 이름과 서비스의 특정 내용을 잘 나타내는 단어를 선호합니다.
        """
        usr_prompt_template = "#질문:\n{question}\n\n #응답 형식:\n{csv_list_response_format}"
        sys_prompt, usr_prompt = create_prompt(sys_prompt_template, usr_prompt_template, language=language, question=question, csv_list_response_format=csv_list_response_format)
        model_id = low_level_model 
        keywords = converse_with_bedrock(sys_prompt, usr_prompt, model_id)
        print(keywords)
        
        if searching_scheme == "full_text":
            search_query ="""
            CALL db.index.fulltext.queryNodes("Search_Content_by_FullText", $keywords) YIELD node, score
            WHERE node:Content
            OPTIONAL MATCH (title:Title)-[:HAS_CONTENTS]->(node)
            RETURN node.text as text, score, title.name as title_name, title.level as title_level
            ORDER BY score DESC
            LIMIT $k
            """
        elif searching_scheme == "keyword":
            search_query = """
            MATCH (content:Content)
            WITH content, $keywords AS keyword
            WHERE content.text CONTAINS keyword
            OPTIONAL MATCH (title:Title)-[:HAS_CONTENTS]->(content)
            RETURN content.text AS text, 
                size(split(toLower(content.text), toLower(keyword))) - 1 AS score,
                {
                    title: title.name,
                    level: title.level,
                    value: title.value
                } AS metadata
            ORDER BY score DESC
            LIMIT $k
            """
        
        params = {"k": k, "keywords": keywords}
        search_results = graph.run(search_query, params)
        
    content = "\n\n\n".join(f"{record['text']} (Score: {record['score']})" for record in search_results)
    return content

In [None]:
# 질문의 주제 선택 (찾을 수 없는 정보)
question = "Amazon Bedrock의 가격 정책"

content = global_search_dev(question, graph, 'English', 5)
print(content)

In [None]:
from langchain_aws import ChatBedrock
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

def generate_answer_dev(question, context):
    # Prompt setting
    sys_prompt_template = "당신은 AWS에 정통한 전문 엔지니어입니다. 주어진 사전 정보만 활용하여, 사용자 질문에 답변을 생성하세요. 사전 정보로 주어지지 않은 내용에 대한 질문에는 모른다고 답변하세요."
    usr_prompt_template = "#사전 정보: {context}\n\n #사용자 질문:\n {question}"
    prompt = ChatPromptTemplate.from_messages([("system", sys_prompt_template), ("human",usr_prompt_template)])

    # Model setting
    model_kwargs = {
            "temperature": 0.5,
            "max_tokens": 4096
        }
    llm = ChatBedrock(model_id=high_level_model, region_name="us-west-2", model_kwargs=model_kwargs, streaming=True)   

    # Output setting
    parser = StrOutputParser()

    # Chain
    chain = prompt | llm | parser
    for chunk in chain.stream({"context": context, "question": question}):
        print(chunk, end="", flush=True)

In [None]:
# 질문의 주제 선택 
question = "Bedrock에서 Agent의 성능을 최적화하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)

In [None]:
# 질문의 주제 선택
question = "Bedrock에서 Knowledge Base를 테스트하는 방법"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)

In [None]:
# 질문의 주제 선택 (문서에 없는 정보)
question = "Bedrock의 가격 정책"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)

In [None]:
# 질문의 주제 선택
question = "What is Amazon Bedrock Playground?"
target_node, subgraph, next_step = select_subgraph_dev(question, graph)
print(target_node, "|", subgraph, "|", next_step)

# 적합한 헤더 찾기
while target_node:
    print("target_node:", target_node)
    result = traverse_child_dev(question, subgraph, graph, target_node)
    traverse_results.append(result)

    if result.next_action == "get_content":
        break

    if result.selected_child_ids:
        target_node = [result.selected_child_ids[0]]
    else:
        break

# 문서 얻어내기
context = get_contents_dev(graph, result.parent_id, 5)
print("==============")
print("Search Type:", context.search_type)
if context.search_type == 'node_level_search':
    context.contents = node_level_search_dev(question, graph, context.parent_id, context.parent_name, context.k)

status = check_relevance_dev(question, context.contents, context.parent_name, context.contents_length, context.search_type, context.k)
print(status)

if status == 'Partial':
    context.contents = get_sibling_contents_dev(graph, context.parent_id, context.contents, context.k)
elif status == 'None':
    context.contents = subgraph_level_search_dev(question, graph, subgraph, 'English', 5)

generate_answer_dev(question, context.contents)