# 02 – Đánh giá nâng cao RAG

Notebook này xây dựng một hệ RAG đơn giản bằng LangChain và đánh giá bằng DeepEval.

In [2]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

from deepeval import assert_test
from deepeval.test_case import LLMTestCase

from deepeval.metrics import ContextualPrecisionMetric, ContextualRecallMetric, ContextualRelevancyMetric, FaithfulnessMetric

with open('data/rag_document.txt', encoding='utf-8') as f:
    doc = f.read()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = splitter.split_text(doc)

embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts, embeddings)
retriever = vectorstore.as_retriever()

llm = ChatOpenAI(model_name="gpt-3.5-turbo")
qa_chain = RetrievalQA(llm=llm, retriever=retriever)

question = "Dân số TP HCM khoảng bao nhiêu?"
result = qa_chain.run(question)
print("KQ:", result)

metrics = [ContextualPrecisionMetric(), ContextualRecallMetric(), ContextualRelevancyMetric(), FaithfulnessMetric()]
case = LLMTestCase(input=question, actual_output=result, context=texts[:5])
assert_test(case, metrics)

ModuleNotFoundError: No module named 'langchain.embeddings'

## Synthesizer – Tạo bộ dữ liệu tự động

In [None]:
from deepeval.synthesizer import RAGSynthesizer
synth = RAGSynthesizer(document_path='data/rag_document.txt', num_cases=3)
testset = synth.generate()
print(testset[0])