In [None]:
with open('demo/현대해상3(퇴직연금)상품약관.txt', 'r') as file:
    text = file.read()

# print(text[:512])

1) **Building**: RAPTOR recursively embeds, clusters, and summarizes chunks of text to construct a tree with varying levels of summarization from the bottom up. You can create a tree from the text in 'sample.txt' using `RA.add_documents(text)`.

2) **Querying**: At inference time, the RAPTOR model retrieves information from this tree, integrating data across lengthy documents at different abstraction levels. You can perform queries on the tree with `RA.answer_question`.

### Building the tree

In [None]:
from utils.llm_manager import AzureAIClientManager
from raptor.EmbeddingModels import AzureEmbeddingModel
from raptor.SummarizationModels import AzureSummarizationModel
from raptor.QAModels import AzureQAModel
from raptor import RetrievalAugmentation, RetrievalAugmentationConfig
from dotenv import load_dotenv

import os

load_dotenv()

# 1) Azure 클라이언트 매니저 설정
azure_emb_client = AzureAIClientManager(
    endpoint= os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    deployment="text-embedding-3-large"      # ↖ 임베딩 전용
)
emb_model = AzureEmbeddingModel(azure_emb_client)

azure_chat_client = AzureAIClientManager(
    endpoint= os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    deployment="gpt-4o"                      # ↖ chat 전용
)
sum_model = AzureSummarizationModel(azure_chat_client)
qa_model  = AzureQAModel(azure_chat_client)


# 3) RAPTOR 설정에 주입
cfg = RetrievalAugmentationConfig(
    embedding_model=emb_model,
    summarization_model=sum_model,
    qa_model=qa_model,
    tb_max_tokens=512,
    tb_summarization_length=512
)



In [None]:
# 4) RAPTOR 실행
RA = RetrievalAugmentation(config=cfg)
RA.add_documents(text)

In [None]:
from raptor.visualize import visualize_tree_structure
from raptor.tree_structures import Node, Tree
import random, string
from typing import List

tree = RA.tree
# Now create a new root Node on top of all root nodes
root_node = Node(
    "Tree Root",
    index=-1,
    children=list(map(lambda x: x.index, tree.root_nodes.values())),
    embeddings=[],
)
visualize_tree_structure(root_node, tree)

In [None]:
# 5) 간단한 임베딩 확인
vec = emb_model.create_embedding("hello world")
print(f"벡터 차원: {len(vec)}  예시값: {vec[:5]}")

In [None]:
# 6) 중간 컨텍스트 확인
ctx, layers = RA.retrieve("How did Cinderella reach her happy ending?", return_layer_information=True)
print("선택된 컨텍스트:", ctx[:200].replace("\n"," "))
print("ctx length:", len(ctx))
print("레이어 정보:", layers)
print("레이어 정보 길이:", len(layers))

In [None]:
query = "이율적용형 이율 비율"
root_node = Node(
    query,
    index=-1,
    children=[n.index for n in tree.root_nodes.values()],
    embeddings=[],
)

ctx, layers = RA.retrieve(query, return_layer_information=True)
highlight = [info["node_index"] for info in layers]

visualize_tree_structure(root_node, tree,
                         highlight_node_indices=highlight,
                         highlight_color="#e74c3c")   # 빨간색 하이라이트

In [None]:
from raptor.visualize import visualize_tree_structure_pyvis

# visualize_tree_structure_pyvis(
#     root_node,
#     tree,
#     highlight_node_indices=highlight,
#     highlight_color="#e74c3c",   # 빨강
#     output_file="tree.html",     # 생성 HTML
# )

In [None]:
# 7) 실제 QA
answer, layers = RA.answer_question("이율적용형 이율 비율", return_layer_information=True)
print("Answer:", answer)
print("레이어 정보:", layers)