#### データソースを準備

In [1]:
from trafilatura import fetch_url, extract

url = 'https://www.shugiin.go.jp/internet/itdb_annai.nsf/html/statics/shiryo/dl-constitution.htm'
filename = 'kenpo.txt'

document = fetch_url(url)
text = extract(document)

if text is not None:
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(text)
else:
    print("No text could be extracted from the document.")


#### チャンク分割

In [2]:
from langchain.document_loaders import TextLoader

loader = TextLoader(filename, encoding='utf-8')
raw_docs = loader.load()

from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=100,
    chunk_overlap=0,
)

docs = text_splitter.split_documents(raw_docs)

print(len(docs))
print(docs[0])


Created a chunk of size 172, which is longer than the specified 100
Created a chunk of size 435, which is longer than the specified 100
Created a chunk of size 332, which is longer than the specified 100
Created a chunk of size 165, which is longer than the specified 100
Created a chunk of size 116, which is longer than the specified 100
Created a chunk of size 147, which is longer than the specified 100
Created a chunk of size 124, which is longer than the specified 100
Created a chunk of size 154, which is longer than the specified 100
Created a chunk of size 135, which is longer than the specified 100
Created a chunk of size 118, which is longer than the specified 100
Created a chunk of size 105, which is longer than the specified 100
Created a chunk of size 156, which is longer than the specified 100
Created a chunk of size 107, which is longer than the specified 100
Created a chunk of size 108, which is longer than the specified 100
Created a chunk of size 147, which is longer tha

227
page_content='朕は、日本国民の総意に基いて、新日本建設の礎が、定まるに至つたことを、深くよろこび、枢密顧問の諮詢及び帝国憲法第七十三条による帝国議会の議決を経た帝国憲法の改正を裁可し、ここにこれを公布せしめる。' metadata={'source': 'kenpo.txt'}


#### embedding

In [3]:
import os
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings

load_dotenv()

# OpenAIのEmbeddingモデルを読み込む
embeddings = OpenAIEmbeddings(deployment=os.environ["DEPLOYMENT_NAME_EMBEDDINGS"])

from langchain.vectorstores import Chroma

db = Chroma.from_documents(docs, embeddings)

#### 回答

In [6]:
from langchain.chains import  RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage

retriever = db.as_retriever()

chat = AzureChatOpenAI(
    deployment_name=os.environ["DEPLOYMENT_NAME"],
    temperature=0
)

rag_qa = RetrievalQA.from_chain_type(
    llm = chat,
    chain_type= "stuff",
    retriever = retriever,
)

query = "日本国憲法では投票は国民の義務ですか？"

result = chat([HumanMessage(content=query)])
rag_result = rag_qa.run(query)

print("RAGなしの回答")
print(result.content)
print("RAGを用いた回答")
print(rag_result)

RAGなしの回答
いいえ、日本国憲法では投票は国民の義務ではありません。ただし、国民は自由に選挙権を行使することができます。憲法第15条には、「公務員の選挙に関し、選挙人は、その選挙において自由に選択する権利を有する」と規定されています。また、憲法第42条には、「すべて国民は、公共の福祉に反しない限り、その思想、良心及び信教の自由を保障される」とあり、投票しないことも含めて、自由な思想・良心の表現として認められています。
RAGを用いた回答
日本国憲法では、投票は国民の義務ではありません。ただし、国民の権利として投票が保障されており、自由に行使することができます。
