In [1]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

def build_faiss(path, index_path="faiss_index"):
    from pdf_utils import pdf_to_docs
    docs = pdf_to_docs(path)

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts, metadatas = [], []
    for d in docs:
        chunks = text_splitter.split_text(d["text"])
        for c in chunks:
            texts.append(c)
            metadatas.append({"page": d["page"]})

    emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(texts, emb, metadatas=metadatas)
    vector_store.save_local(index_path)
    return vector_store

def load_faiss(index_path="faiss_index"):
    emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.load_local(index_path, embeddings=emb, allow_dangerous_deserialization=True)


In [2]:
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.schema import HumanMessage
from vector_store import load_faiss

def get_llm():
    return ChatGroq(
        groq_api_key="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",
        model="llama-3.1-8b-instant",
        temperature=0.0,
        max_tokens=512
    )

def load_qa(index_path="faiss_index"):
    index = load_faiss(index_path)
    retriever = index.as_retriever(search_kwargs={"k": 4})
    llm = get_llm()
    return RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
    )

def ask_question(qa, question, role_prompt, q):
    answer_pack = qa({"query": question})
    raw_answer, docs = answer_pack["result"], answer_pack["source_documents"]

    llm = get_llm()
    reframed = llm.invoke([HumanMessage(content=f"{role_prompt}\n\nQ: {q}\n\nA: {raw_answer}")])
    return reframed.content if hasattr(reframed, "content") else reframed, docs


In [3]:
import streamlit as st
import os
from vector_store import build_faiss, load_faiss
from qa_engine import get_llm
from langchain.chains import RetrievalQA

UPLOAD_DIR = "uploads"
INDEX_DIR = "indexes"

os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(INDEX_DIR, exist_ok=True)

st.set_page_config(page_title="Policy Chatbot", layout="centered")
st.title("üìë Policy Chatbot")

# ---- File Upload ----
uploaded = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded:
    file_path = os.path.join(UPLOAD_DIR, uploaded.name)
    with open(file_path, "wb") as f:
        f.write(uploaded.getbuffer())
    st.success(f"Saved {uploaded.name}")

    # Build FAISS index for this file
    index_path = os.path.join(INDEX_DIR, uploaded.name.replace(".pdf", ""))
    build_faiss(file_path, index_path=index_path)
    st.session_state["active_index"] = index_path
    st.success("Index built ‚úÖ ‚Äî Start chatting below!")

# ---- Chat Window ----
if "messages" not in st.session_state:
    st.session_state["messages"] = []

for msg in st.session_state["messages"]:
    with st.chat_message(msg["role"]):
        st.write(msg["content"])

if prompt := st.chat_input("Ask me anything about the policy..."):
    # Show user message
    st.session_state["messages"].append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.write(prompt)

    # Load retriever
    if "active_index" in st.session_state:
        index = load_faiss(st.session_state["active_index"])
        retriever = index.as_retriever(search_kwargs={"k": 4})
        llm = get_llm()
        qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

        answer = qa.run(prompt)

        # Bot answer
        st.session_state["messages"].append({"role": "assistant", "content": answer})
        with st.chat_message("assistant"):
            st.write(answer)
    else:
        with st.chat_message("assistant"):
            st.write("‚ö†Ô∏è Please upload a PDF first.")


2025-09-08 10:43:38.660 
  command:

    streamlit run C:\Users\tanis\AppData\Roaming\Python\Python313\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-09-08 10:43:38.683 Session state does not function when running a script without `streamlit run`


In [1]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS