In [2]:
import dotenv

dotenv.load_dotenv()

True

In [4]:
import logging
import sys

logging.basicConfig(
    stream=sys.stdout,
    # DEBUG: デバッグ用に詳細なログを出力する
    level=logging.DEBUG,
    force=True,
)

In [5]:
from llama_index import SimpleDirectoryReader

# ドキュメントの読み込み
path = "./data"
documents = SimpleDirectoryReader(path).load_data()

DEBUG:llama_index.readers.file.base:> [SimpleDirectoryReader] Total files added: 2


In [8]:
from llama_index import GPTVectorStoreIndex

# インデックス: データベースにおける目的の情報を効率的に取得するための索引
# GPTVectorStoreIndex: 辞書型(dict)でデータを保持するインデックス
index = GPTVectorStoreIndex.from_documents(documents)

DEBUG:llama_index.node_parser.node_utils:> Adding chunk: This article is available at 5 reading levels a...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: This article is available at 5 reading levels a...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: This article is available at 5 reading levels a...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: This article is available at 5 reading levels a...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: This article is available at 5 reading levels a...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: This article is available at 5 reading levels a...
DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/embeddings
DEBUG:openai:api_version=None data='{"input": ["page_label: 1 file_name: clever-seals-dive-deep-2001041616-article_only.pdf  This article is available at 5 reading levels at https://newsela.com.How deep is the sea? These clever seals know and are happil

### Memo
LLMには最大プロンプト長があるため、長いテキストでは処理できない。<br>
そこで「チャンク」と呼ばれる短いテキストに分割されて処理されます。

In [9]:
# クエリエンジン: ユーザ入力に関連する情報をインデックスから取得し、ユーザ入力と取得した情報を元に応答を生成する
query_engine = index.as_query_engine()

In [12]:
response = query_engine.query("What is Hamilton? Please explain it to me shortly.")
print('---------------------------------')
print(response)

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/embeddings
DEBUG:openai:api_version=None data='{"input": ["What is Hamilton? Please explain it to me shortly."], "model": "text-embedding-ada-002", "encoding_format": "base64"}' message='Post details'
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/embeddings HTTP/1.1" 200 None
DEBUG:openai:message='OpenAI API response' path=https://api.openai.com/v1/embeddings processing_ms=25 request_id=ebb51f3e44f0c2779295b8fa3751df24 response_code=200
DEBUG:llama_index.indices.utils:> Top 2 nodes:
> [Node b92fbcdf-69e9-43b7-8cc8-7204307bbed4] [Similarity score:             0.797915] This article is available at 5 reading levels at https://newsela.com.Edelman says the simulator w...
> [Node 3313fc6f-4723-40a6-ac99-9c4084c4ac1d] [Similarity score:             0.796045] This article is available at 5 reading levels at https://newsela.com.If you’ve ever wanted to be ...
DEBUG:openai:message='Reques

In [13]:
# インデックスの保存
index.storage_context.persist()

DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/docstore.json
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/index_store.json
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/vector_store.json
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/graph_store.json


In [14]:
from llama_index import StorageContext, load_index_from_storage

# インデックスの読み込み
persist_dir= "./storage/"
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
index = load_index_from_storage(storage_context)

DEBUG:llama_index.storage.kvstore.simple_kvstore:Loading llama_index.storage.kvstore.simple_kvstore from ./storage/docstore.json.
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/docstore.json
DEBUG:llama_index.storage.kvstore.simple_kvstore:Loading llama_index.storage.kvstore.simple_kvstore from ./storage/index_store.json.
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/index_store.json
DEBUG:llama_index.vector_stores.simple:Loading llama_index.vector_stores.simple from ./storage/vector_store.json.
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/vector_store.json
DEBUG:llama_index.graph_stores.simple:Loading llama_index.graph_stores.simple from ./storage/graph_store.json.
DEBUG:fsspec.local:open file: d:/LlamaIndex_tutorial/storage/graph_store.json
INFO:llama_index.indices.loading:Loading all indices.
