# Added category

In [None]:
import os
import streamlit as st
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.chains import RetrievalQA

# --- Setup ---

# Set Pinecone API key from env
os.environ["PINECONE_API_KEY"] = "your_pinecone_api_key"  # Replace this

# Pinecone client
pc = Pinecone()
index_name = "rag-index"

# Embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load vectorstore
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embedding_model)

# Load HuggingFace LLM
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=256)
llm = HuggingFacePipeline(pipeline=pipe)

# --- Streamlit UI ---

st.set_page_config(page_title="RAG Assistant", page_icon="📚")
st.title("📚 RAG Assistant with Pinecone + HuggingFace")

# Step 2: Category Filter
category_filter = st.selectbox(
    "📂 Filter by document category (optional):",
    ["All", "billing", "account", "support"]
)

# Step 1: User Query
query = st.text_input("Ask me a question from your documents:")

# Build retriever with optional metadata filtering
if category_filter == "All":
    retriever = vectorstore.as_retriever()
else:
    retriever = vectorstore.as_retriever(
        search_kwargs={"filter": {"category": category_filter}}
    )

# QA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

# Run query
if query:
    response = qa_chain.invoke({"query": query})

    st.markdown("### 🤖 Answer:")
    st.write(response["result"])

    st.markdown("### 📂 Sources:")
    for i, doc in enumerate(response["source_documents"]):
        st.markdown(f"**🔹 Source {i+1} — Category:** `{doc.metadata.get('category')}`")
        with st.expander("🔍 View Matched Text"):
            st.write(doc.page_content.strip())