## Step 1: Import & Load Documents with Metadata

In [7]:
from langchain_core.documents import Document

# Manually define docs with metadata
docs = [
    Document(
        page_content="You can reset your password from the account settings page.",
        metadata={"category": "account", "source": "faq.txt"}
    ),
    Document(
        page_content="Refunds are processed within 5–7 business days after request.",
        metadata={"category": "billing", "source": "faq.txt"}
    ),
    Document(
        page_content="To reach support, please visit the Help Center page.",
        metadata={"category": "support", "source": "faq.txt"}
    )
]

## Step 2: Embed with HuggingFace + Store in FAISS

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(docs, embedding_model)

## Step 3: Filtered Retrieval by Metadata

In [9]:
retriever = vectorstore.as_retriever(
    search_kwargs={
        "k": 3,
        "filter": {"category": "billing"}  # Change category to test
    }
)

## Step 4: Load HuggingFace LLM and Create QA Chain

In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_huggingface import HuggingFacePipeline
from langchain.chains import RetrievalQA

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=100)
llm = HuggingFacePipeline(pipeline=pipe)

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")

Device set to use mps:0


## Step 5: Ask a Filtered Question

In [11]:
query = "How long does it take to get a refund?"
response = qa_chain.invoke({"query": query})

print("🔍 Question:", query)
print("📘 Answer:", response["result"])

🔍 Question: How long does it take to get a refund?
📘 Answer: 5–7 business days
