# RAG – LangChain (EN/VN)
**Objective/Mục tiêu**: Minimal RAG over local docs; embedding → vector store → retriever → LLM. Replace LLM with local or API-backed model.



# !pip install -q langchain langchain-community langchain-text-splitters faiss-cpu sentence-transformers transformers pypdf


In [None]:

import os, glob
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

# Prepare corpus folder
os.makedirs("docs", exist_ok=True)
with open("docs/sample.txt", "w") as f:
    f.write("Retrieval-augmented generation (RAG) combines retrieval with generation to reduce hallucination.")

# Load files
docs = []
for path in glob.glob("docs/*.txt"):
    docs.extend(TextLoader(path).load())

# Split
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=60)
chunks = splitter.split_documents(docs)

# Embeddings + Vector store
emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vs = FAISS.from_documents(chunks, emb)

# Retriever
retriever = vs.as_retriever(search_kwargs={"k": 4})

# Simple local LLM via transformers pipeline (replace with API/vLLM later)
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)
mdl = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
gen = pipeline("text-generation", model=mdl, tokenizer=tok, max_new_tokens=256)

def answer(query):
    rel_docs = retriever.get_relevant_documents(query)
    context = "

".join([d.page_content for d in rel_docs])
    prompt = f"Use the CONTEXT to answer. Be concise.
CONTEXT:
{context}

QUESTION: {query}
ANSWER:"
    out = gen(prompt)[0]["generated_text"]
    return out

print(answer("What is RAG?"))
