# Document Summary Index を試す

### 1. セットアップ

In [1]:
import sys
from pprint import pprint
sys.path.append("/home/paper_translator/")
pprint(sys.path)

['/home/paper_translator/lib',
 '/home/paper_translator',
 '/usr/lib/python311.zip',
 '/usr/lib/python3.11',
 '/usr/lib/python3.11/lib-dynload',
 '',
 '/home/paper_translator/.venv/lib/python3.11/site-packages',
 '/home/paper_translator/']


In [2]:
import nest_asyncio

# 非同期処理の有効化
nest_asyncio.apply()

In [3]:
import logging
import sys

# ログレベルの設定
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True)

In [13]:
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
llmama_debug_handler = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llmama_debug_handler])

### 2. ドキュメントの準備

In [43]:
from src.XMLUtils import DocumentReader

In [44]:
base_path = "/home/paper_translator/data"
document_name = (
    "Learning_Transferable_Visual_Models_From_Natural_Language_Supervision"
)
document_path = f"{base_path}/documents/{document_name}"
xml_path = f"{document_path}/{document_name}.tei.xml"

In [45]:
reader = DocumentReader()
docs = reader.load_data(xml_path=xml_path)

### 3. Context の設定

In [18]:
from llama_index import ServiceContext
from langchain.embeddings import HuggingFaceEmbeddings
from src.translator.llama_cpp import create_llama_cpp_model

In [15]:
model_path = "/home/paper_translator/data/models/ELYZA-japanese-Llama-2-7b-fast-instruct-q4_K_M.gguf"
llm = create_llama_cpp_model(package_name="llama_index", model_path=model_path)

ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce GTX 1660 Ti, compute capability 7.5
llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /home/paper_translator/data/models/ELYZA-japanese-Llama-2-7b-fast-instruct-q4_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 45043,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    6:            blk.0.f

In [19]:
model_name = "sentence-transformers/all-MiniLM-l6-v2"
embed_model = HuggingFaceEmbeddings(model_name=model_name)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    callback_manager=callback_manager
)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-l6-v2
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/sentence-transformers/all-MiniLM-l6-v2 HTTP/1.1" 307 85
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/sentence-transformers/all-MiniLM-L6-v2 HTTP/1.1" 200 9633
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/.gitattributes HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/.gitattributes HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234683686288 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/.gitattributes.lock
DEBUG:fil

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234683686288 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/.gitattributes.lock
DEBUG:filelock:Lock 140234683686288 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/.gitattributes.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/1_Pooling/config.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/1_Pooling/config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694907344 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/1_Pooling/config.json.lock
DEBUG:filelock:Lock 140234694907344 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/1_Pooling/config.

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694907344 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/1_Pooling/config.json.lock
DEBUG:filelock:Lock 140234694907344 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/1_Pooling/config.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/README.md HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/README.md HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694923920 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/README.md.lock
DEBUG:filelock:Lock 140234694923920 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/README.md.lock
DEBUG:urllib3.connection

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694923920 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/README.md.lock
DEBUG:filelock:Lock 140234694923920 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/README.md.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/config.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234695064272 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config.json.lock
DEBUG:filelock:Lock 140234695064272 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config.json.lock
DEBUG:urllib3.connectionpool:https://hug

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234695064272 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config.json.lock
DEBUG:filelock:Lock 140234695064272 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/config_sentence_transformers.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/config_sentence_transformers.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694725328 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config_sentence_transformers.json.lock
DEBUG:filelock:Lock 140234694725328 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694725328 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config_sentence_transformers.json.lock
DEBUG:filelock:Lock 140234694725328 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/config_sentence_transformers.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/data_config.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/data_config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694835920 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/data_config.json.lock
DEBUG:filelock:Lock 140234694835920 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694835920 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/data_config.json.lock
DEBUG:filelock:Lock 140234694835920 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/data_config.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/pytorch_model.bin HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/pytorch_model.bin HTTP/1.1" 302 0
DEBUG:filelock:Attempting to acquire lock 140234694754512 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/pytorch_model.bin.lock
DEBUG:filelock:Lock 140234694754512 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/pytorch_model.bin.lock
DE

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694754512 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/pytorch_model.bin.lock
DEBUG:filelock:Lock 140234694754512 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/pytorch_model.bin.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/sentence_bert_config.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/sentence_bert_config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234682117200 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/sentence_bert_config.json.lock
DEBUG:filelock:Lock 140234682117200 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234682117200 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/sentence_bert_config.json.lock
DEBUG:filelock:Lock 140234682117200 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/sentence_bert_config.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/special_tokens_map.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/special_tokens_map.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694569936 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/special_tokens_map.json.lock
DEBUG:filelock:Lock 140234694569936 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-Mi

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694569936 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/special_tokens_map.json.lock
DEBUG:filelock:Lock 140234694569936 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/special_tokens_map.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/tokenizer.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/tokenizer.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234682042896 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer.json.lock
DEBUG:filelock:Lock 140234682042896 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer.json.lock


Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234682042896 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer.json.lock
DEBUG:filelock:Lock 140234682042896 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/tokenizer_config.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694769424 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer_config.json.lock
DEBUG:filelock:Lock 140234694769424 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer_config.

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694769424 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer_config.json.lock
DEBUG:filelock:Lock 140234694769424 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/tokenizer_config.json.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/train_script.py HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/train_script.py HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694830800 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/train_script.py.lock
DEBUG:filelock:Lock 140234694830800 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/train_script.py.lock


Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694830800 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/train_script.py.lock
DEBUG:filelock:Lock 140234694830800 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/train_script.py.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/vocab.txt HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/vocab.txt HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694473744 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/vocab.txt.lock
DEBUG:filelock:Lock 140234694473744 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/vocab.txt.lock
DEBUG:urllib3.connectionpool:https:/

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694473744 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/vocab.txt.lock
DEBUG:filelock:Lock 140234694473744 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/vocab.txt.lock
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-l6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/modules.json HTTP/1.1" 307 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /sentence-transformers/all-MiniLM-L6-v2/resolve/7dbbc90392e2f80f3d3c277d6e90027e55de9125/modules.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140234694552720 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/modules.json.lock
DEBUG:filelock:Lock 140234694552720 acquired on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/modules.json.lock
DEBUG:urllib3.connectionpool:https:/

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

DEBUG:filelock:Attempting to release lock 140234694552720 on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/modules.json.lock
DEBUG:filelock:Lock 140234694552720 released on /root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-l6-v2/modules.json.lock
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cuda


[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### 4. DocumentSummaryIndex の作成

In [9]:
from llama_index.llms.base import ChatMessage, MessageRole
from llama_index.prompts import ChatPromptTemplate

In [20]:
# (1) QAプロンプトの定義。
# QAシステムプロンプト
TEXT_QA_SYSTEM_PROMPT = ChatMessage(
    content=(
        "あなたは世界中で信頼されているQAシステムです。\n"
        "事前知識ではなく、常に提供されたコンテキスト情報を使用してクエリに回答してください。\n"
        "従うべきいくつかのルール:\n"
        "1. 回答内で指定されたコンテキストを直接参照しないでください。\n"
        "2. 「コンテキストに基づいて、...」や「コンテキスト情報は...」、またはそれに類するような記述は避けてください。"
    ),
    role=MessageRole.SYSTEM,
)

# QAプロンプトテンプレートメッセージ
TEXT_QA_PROMPT_TMPL_MSGS = [
    TEXT_QA_SYSTEM_PROMPT,
    ChatMessage(
        content=(
            "コンテキスト情報は以下のとおりです。\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "事前知識ではなくコンテキスト情報を考慮して、クエリに答えます。\n"
            "Query: {query_str}\n"
            "Answer: "
        ),
        role=MessageRole.USER,
    ),
]

# チャットQAプロンプト
CHAT_TEXT_QA_PROMPT = ChatPromptTemplate(message_templates=TEXT_QA_PROMPT_TMPL_MSGS)

In [21]:
# (2) TreeSummarizeプロンプトの定義。
# QAシステムプロンプト
TEXT_QA_SYSTEM_PROMPT = ChatMessage(
    content=(
        "あなたは世界中で信頼されているQAシステムです。\n"
        "事前知識ではなく、常に提供されたコンテキスト情報を使用してクエリに回答してください。\n"
        "従うべきいくつかのルール:\n"
        "1. 回答内で指定されたコンテキストを直接参照しないでください。\n"
        "2. 「コンテキストに基づいて、...」や「コンテキスト情報は...」、またはそれに類するような記述は避けてください。"
    ),
    role=MessageRole.SYSTEM,
)

# ツリー要約プロンプトメッセージ
TREE_SUMMARIZE_PROMPT_TMPL_MSGS = [
    TEXT_QA_SYSTEM_PROMPT,
    ChatMessage(
        content=(
            "複数のソースからのコンテキスト情報を以下に示します。\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "予備知識ではなく、複数のソースからの情報を考慮して、質問に答えます。\n"
            "疑問がある場合は、「情報無し」と答えてください。\n"
            "Query: {query_str}\n"
            "Answer: "
        ),
        role=MessageRole.USER,
    ),
]

# ツリー要約プロンプト
CHAT_TREE_SUMMARIZE_PROMPT = ChatPromptTemplate(
    message_templates=TREE_SUMMARIZE_PROMPT_TMPL_MSGS
)

In [36]:
# Summaryクエリ
SUMMARY_QUERY = "提供されたテキストの内容を要約してください。"

In [46]:
from llama_index import get_response_synthesizer
from llama_index.indices.document_summary import DocumentSummaryIndex

# サービスコンテキストの準備
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    callback_manager=callback_manager,
    chunk_size=3072,
)

# レスポンスシンセサイザーの準備
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
    use_async=True,
    text_qa_template=CHAT_TEXT_QA_PROMPT,  # QAプロンプト
    summary_template=CHAT_TREE_SUMMARIZE_PROMPT,  # TreeSummarizeプロンプト
)

# DocumentSummaryIndexの準備
doc_summary_index = DocumentSummaryIndex.from_documents(
    docs,
    service_context=service_context,
    response_synthesizer=response_synthesizer,
    summary_query=SUMMARY_QUERY,  # 要約クエリ
)

DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Pre-training methods which learn directly from ...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: At the core of our approach is the idea of lear...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: At the core of our approach is the idea of lear...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Existing work has mainly used three datasets, M...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: State-of-the-art computer vision systems use ve...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: We consider two different architectures for the...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: We train a series of 5 ResNets and 3 Vision Tra...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: 3.1. Zero-Shot Transfer
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: In computer vision, zero-shot learning usually ...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk

In [47]:
print(doc_summary_index.get_document_summary("Introduction and Motivating Work"))

提供されたテキストは、論文「Learning Transferable Visual Models From Natural Language Supervision」に関する情報を含んでいます。この論文は、直接生のテキストから学習する事前学習手法が、最近の数年間で自然言語処理（NLP）に革命をもたらしていることを述べています。この論文は2021年2月26日に公開され、著者にはAlec Radford、Jong Wook Kim、Chris Hallacy、Aditya Ramesh、Gabriel Goh、Sandhini Agarwal、Girish Sastry、Amanda Askell、Pamela Mishkin、Jack Clark、Gretchen Krueger、Ilya Sutskeverが含まれています。


: 