In [16]:
import os

from haystack import Pipeline, Document
from haystack.utils import Secret
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack_integrations.components.generators.ollama import OllamaGenerator

In [17]:
train_data = []
train_data_hava_dups = []
# with open("input_52_ch_en.txt", "r", encoding="utf-8") as f:
with open("input.txt", "r", encoding="utf-8") as f:
    for line in f:
        train_data_hava_dups.append(line.strip())
# remove duplicates in train_data
train_data_hava_dups = list(set(train_data_hava_dups))
for line in train_data_hava_dups:
    train_data.append(Document(content=line))

In [18]:
document_store = InMemoryDocumentStore()
document_store.write_documents(
    train_data
)

62

In [19]:
# read api_key.txt to get the API key
with open("api_key.txt", "r") as f:
    api_key = f.readline().strip()

os.environ['GROQ_API_KEY'] = api_key

In [20]:
retriever = InMemoryBM25Retriever(document_store=document_store)
prompt_template = """
According to the contents of this website:
{% for document in documents %}
  {{document.content}}
{% endfor %}
Answer the given question: {{question}}
Answer:
"""
prompt_builder = PromptBuilder(template=prompt_template)
llm = OpenAIGenerator(
    api_key=Secret.from_env_var("GROQ_API_KEY"),
    api_base_url="https://api.groq.com/openai/v1",
    model="llama3-70b-8192",
    generation_kwargs = {"max_tokens": 1024}
)
pipeline = Pipeline()

rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", llm)
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")


<haystack.core.pipeline.pipeline.Pipeline object at 0x000001551B1265F0>
🚅 Components
  - retriever: InMemoryBM25Retriever
  - prompt_builder: PromptBuilder
  - llm: OpenAIGenerator
🛤️ Connections
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [27]:
# question = "Based on the documents, are the author named 陳鼎 of 滇黔土司㛰禮記 and the author of 滇黔紀游 the same person? Please provide your reasons."
question = "用中文回答。根據這些文件，特別是他們生平，譬如 courtesy name, style name、著述，哪裡人的資訊告訴我，滇黔土司㛰禮記的作者陳鼎和滇黔紀遊的作者是同一個人嗎？請提供你的理由。"
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

print(results["llm"]["replies"][0])

根據網站的內容，滇黔土司㛰禮記的作者陳鼎和滇黔紀遊的作者是同一个人。

理由如下：

1. 陳鼎是滇黔土司㛰禮記的作者，這可以在該書的作者欄中找到。
2. 滇黔紀遊的作者也是一位名叫陳鼎的人，這可以在該書的作者欄中找到。
3. 對比兩本書的作者生平，發現兩人的birthday、籍貫、著述等資訊完全相同。
4. 由於兩人的生平資料完全相同，因此可以斷定滇黔土司㛰禮記的作者陳鼎和滇黔紀遊的作者是同一個人。

因此，根據以上理由，我們可以斷定滇黔土司㛰禮記的作者陳鼎和滇黔紀遊的作者是同一個人。
