# RAG 進階測試
https://python.langchain.com/docs/tutorials/rag/

## 載入套件

In [1]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from langchain_openai import ChatOpenAI

## 讀取資料檔

In [6]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader("./rag_data", glob="**/*.txt")
docs = loader.load()
len(docs)

libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


2

## 資料前置處理

In [7]:
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

# split into chunks
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=20,
    length_function=len
    )
chunks = text_splitter.split_documents(docs)

# create embeddings
embeddings = OpenAIEmbeddings()

# create vector store
vector_store = InMemoryVectorStore(embeddings)

# Index chunks
_ = vector_store.add_documents(documents=chunks)

In [4]:
from langchain import hub
from langgraph.graph import START, StateGraph
from langchain_core.documents import Document
from typing_extensions import List, TypedDict

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()



## 測試

In [5]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")
response = graph.invoke({"question": "小明住哪裡"})
print(response["answer"])

小明住在台北。


In [8]:
response = graph.invoke({"question": "小明去哪裡"})
print(response["answer"])

小明今天去找小春。小春住在台南。小明住在台北。
