In [None]:
# version 5 (final version) - gradio 設定累積歷史提問與答案

# pip3 install langchain_ollama gradio pypdf chromadb
# ollama pull nomic-embed-text

# pip3 pip install ipywidgets
# jupyter nbextension enable --py widgetsnbextension
# pip3 install -U langchain-ollama

In [23]:
import os, sys
sys.path.append('.')


from langchain_ollama import OllamaLLM
from langchain_ollama import OllamaEmbeddings
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
# from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings

from langchain_chroma import Chroma


# 初始化一個全局的問答歷史列表
chat_history = []

# Load the data from a PDF
# file_path = './linux-0.11_source.pdf'
# loader = PyPDFLoader(file_path)
# docs = loader.load()

# 定義要讀取的多個 PDF 檔案路徑
file_paths = ['./unix6.pdf', './xv6-rev7.pdf']  # 這裡可以加入更多的文件

# 初始化一個空的文件列表
docs = []

# 讀取每個 PDF 檔案，並將其內容添加到 docs 列表
for file_path in file_paths:
    loader = PyPDFLoader(file_path)
    docs.extend(loader.load())

# Split the loaded documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Create Ollama embeddings and vector store
embeddings = OllamaEmbeddings(model="nomic-embed-text")


vectorstore = Chroma(embedding_function=embeddings, persist_directory='./chroma_db_xv6')
vectorstore.add_documents(documents=splits)

# Use the updated ollama_llm function
def ollama_llm(question, context):
    formatted_prompt = f"總是用繁體中文回答！\n\nQuestion: {question}\n\nContext: {context}"
    llm = OllamaLLM(model="llama3.1:70b", base_url="http://localhost:11434")
    
    try:
        response = llm.generate(prompts=[formatted_prompt])
        return response.generations[0][0].text
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Define the RAG setup
retriever = vectorstore.as_retriever()

def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return ollama_llm(question, formatted_context)

# Define the Gradio interface
# def get_important_facts(question):
#    return rag_chain(question)

# Gradio 的主邏輯函數，用於處理問題並更新聊天歷史
def get_important_facts(question):
    # 獲取模型回答
    answer = rag_chain(question)
    
    # 更新聊天歷史
    chat_history.append((question, answer))
    
    # 將所有聊天歷史格式化成單一字符串，顯示於輸出
    chat_output = ""
    for q, a in chat_history:
        chat_output += f"**問題**: {q}\n\n**回答**: {a}\n\n"
    
    return chat_output

# Create a Gradio app interface
iface = gr.Interface(
  fn=get_important_facts,
  inputs=gr.Textbox(lines=2, placeholder="Please input your question, eg Please summarize in 500 words"),
  outputs="text",
  title="RAG with Llama3",
  description="Ask questions about the provided context",
)

# Launch the Gradio app
iface.launch()