# Council HR Bot (Denbighshire) — MVP Clean Notebook

This notebook builds a **Router + Leave/Travel/Pay RAG agents + Web Scout** using **Groq + Chroma + LangGraph**.

**Run cells from top → bottom.**


In [None]:
!pip -q install -U langchain langchain-core langchain-community langchain-groq langgraph chromadb sentence-transformers pypdf duckduckgo-search gradio


## 1) Download PDFs (Brain)
These are the 3 policy PDFs used by the specialist agents.


In [None]:
import os, requests

PDFS = {
    "annual_leave_guidance.pdf": "https://www.denbighshire.gov.uk/en/documents/hr-policies/my-employment/leave-and-attendance/annual-leave-guidance-for-managers-and-employees-v5-9-1.pdf",
    "travel_and_subsistence.pdf": "https://www.denbighshire.gov.uk/en/documents/hr-policies/my-employment/pay-and-benefits/travel-and-subsistence-v-2-2.pdf",
    "pay_policy_2025_2026.pdf": "https://www.denbighshire.gov.uk/en/documents/hr-policies/my-employment/pay-and-benefits/pay-policy-2025-2026.pdf",
}

out_dir = "data/pdfs"
os.makedirs(out_dir, exist_ok=True)

for name, url in PDFS.items():
    path = os.path.join(out_dir, name)
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    with open(path, "wb") as f:
        f.write(r.content)
    print("Downloaded:", path)


## 2) Groq API key + LLM
Paste your Groq key when prompted.


In [None]:
import os, getpass
os.environ["GROQ_API_KEY"] = getpass.getpass("Paste your GROQ API key here: ")


In [None]:
from langchain_groq import ChatGroq

# Fast model option: llama-3.1-8b-instant (faster)
# Quality model option: llama-3.3-70b-versatile (better answers, slower)
MODEL_NAME = "llama-3.1-8b-instant"  # change to "llama-3.3-70b-versatile" if you want higher quality

llm = ChatGroq(model=MODEL_NAME, temperature=0)
print(llm.invoke("Reply exactly: GROQ_OK").content)


## 3) Build persistent Vector DBs (Leave / Travel / Pay)
We chunk PDFs and store embeddings in Chroma so we don't rebuild every time.


In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

PERSIST_ROOT = "chroma_db"
os.makedirs(PERSIST_ROOT, exist_ok=True)

emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

def build_store(pdf_path: str, collection: str, persist_dir: str) -> Chroma:
    docs = PyPDFLoader(pdf_path).load()
    fname = os.path.basename(pdf_path)
    for d in docs:
        d.metadata["source"] = fname
    chunks = splitter.split_documents(docs)
    vs = Chroma.from_documents(
        documents=chunks,
        embedding=emb,
        persist_directory=persist_dir,
        collection_name=collection,
    )
    print(f"Built {collection}: chunks={len(chunks)} -> {persist_dir}")
    return vs

leave_vs  = build_store("data/pdfs/annual_leave_guidance.pdf", "leave",  os.path.join(PERSIST_ROOT, "leave_store"))
travel_vs = build_store("data/pdfs/travel_and_subsistence.pdf", "travel", os.path.join(PERSIST_ROOT, "travel_store"))
pay_vs    = build_store("data/pdfs/pay_policy_2025_2026.pdf", "pay",     os.path.join(PERSIST_ROOT, "pay_store"))


## 4) Generic RAG helper (returns answer + sources)
This prevents hallucinations by grounding answers in retrieved chunks.


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

rag_prompt = ChatPromptTemplate.from_template("""
You are an HR policy assistant.
Answer using ONLY the context.
If the answer is not in the context, say: I don't know.

Context:
{context}

Question: {question}

Answer:
""")

def _get_docs(vs, question: str, k: int = 6):
    retriever = vs.as_retriever(search_kwargs={"k": k})
    try:
        return retriever.invoke(question)
    except Exception:
        return retriever.get_relevant_documents(question)

def _format_context(docs):
    return "\n\n---\n\n".join(
        f"[{d.metadata.get('source','doc')} | page {d.metadata.get('page','?')}]\n{d.page_content}"
        for d in docs
    )

def ask_rag(vs, question: str) -> str:
    docs = _get_docs(vs, question, k=6)
    context = _format_context(docs)
    chain = rag_prompt | llm | StrOutputParser()
    answer = chain.invoke({"context": context, "question": question})
    # add sources at end
    cites = []
    for d in docs:
        cites.append((d.metadata.get("source","doc"), d.metadata.get("page","?")))
    # unique
    uniq = []
    for c in cites:
        if c not in uniq:
            uniq.append(c)
    src = "\n".join([f"- {s} (page {p})" for s,p in uniq]) if uniq else "- (no sources)"
    return answer.strip() + "\n\nSources:\n" + src


## 5) Web Scout tool (live website)
Used for job vacancies, news, contacts.


In [None]:
from duckduckgo_search import DDGS

def web_scout(query: str, max_results: int = 3) -> str:
    q = f"site:denbighshire.gov.uk {query}"
    results = []
    with DDGS() as ddgs:
        for r in ddgs.text(q, max_results=max_results):
            results.append(f"- {r.get('title','')}\n  {r.get('href','')}\n  {r.get('body','')}")
    return "\n".join(results) if results else "No results found."


## 6) Router + LangGraph (MVP)
Routes to Leave/Travel/Pay policy agents or Web Scout.


In [None]:
from typing import TypedDict
from langgraph.graph import StateGraph, END

class State(TypedDict):
    question: str
    route: str
    answer: str

def router_node(state: State) -> State:
    q = state["question"].lower()

    leave_kw  = ["annual leave","holiday","holidays","carry over","carryover","sick","sickness","bank holiday","leave year","book leave"]
    travel_kw = ["travel","subsistence","mileage","hire car","hotel","claim","allowance","parking","public transport"]
    pay_kw    = ["pay","salary","grade","honoraria","honorarium","redundancy","overtime","allowance","pay policy"]
    web_kw    = ["job","jobs","vacancy","vacancies","news","contact","email","phone","address"]

    if any(k in q for k in web_kw):
        state["route"] = "web"
    elif any(k in q for k in travel_kw):
        state["route"] = "travel"
    elif any(k in q for k in pay_kw):
        state["route"] = "pay"
    elif any(k in q for k in leave_kw):
        state["route"] = "leave"
    else:
        state["route"] = "leave"  # fallback
    return state

def leave_node(state: State) -> State:
    state["answer"] = ask_rag(leave_vs, state["question"])
    return state

def travel_node(state: State) -> State:
    state["answer"] = ask_rag(travel_vs, state["question"])
    return state

def pay_node(state: State) -> State:
    state["answer"] = ask_rag(pay_vs, state["question"])
    return state

def web_node(state: State) -> State:
    state["answer"] = web_scout(state["question"], max_results=3)
    return state

g = StateGraph(State)
g.add_node("router", router_node)
g.add_node("leave_agent", leave_node)
g.add_node("travel_agent", travel_node)
g.add_node("pay_agent", pay_node)
g.add_node("web_agent", web_node)

g.set_entry_point("router")

def route_to_next(state: State) -> str:
    return {
        "leave": "leave_agent",
        "travel": "travel_agent",
        "pay": "pay_agent",
        "web": "web_agent",
    }[state["route"]]

g.add_conditional_edges("router", route_to_next, {
    "leave_agent": "leave_agent",
    "travel_agent": "travel_agent",
    "pay_agent": "pay_agent",
    "web_agent": "web_agent",
})

g.add_edge("leave_agent", END)
g.add_edge("travel_agent", END)
g.add_edge("pay_agent", END)
g.add_edge("web_agent", END)

app = g.compile()
print("✅ app ready:", "app" in globals())


## 7) Test prompts (assignment self-check)
Run these and keep the output as evidence for submission.


In [None]:
tests = [
    ("Leave",  "How many days of annual leave am I entitled to?"),
    ("Travel", "When am I required to use a hire car instead of my own vehicle?"),
    ("Pay",    "How are honoraria payments calculated?"),
    ("Web",    "Are there any IT jobs available right now?"),
]

for label, q in tests:
    out = app.invoke({"question": q, "route": "", "answer": ""})
    print("\n===", label, "===")
    print("Q:", q)
    print("Route:", out["route"])
    print(out["answer"][:1500])


## 8) Simple end-user UI (Gradio)
This creates a textbox where users type questions and get answers.


In [None]:
import gradio as gr

def hr_bot(question: str) -> str:
    out = app.invoke({"question": question, "route": "", "answer": ""})
    return f"ROUTE: {out['route']}\n\n{out['answer']}"

demo = gr.Interface(
    fn=hr_bot,
    inputs=gr.Textbox(lines=2, placeholder="Ask the Council HR Bot..."),
    outputs="text",
    title="Council HR Bot (Router + Policy RAG + Web Scout)",
    description="Ask about Leave / Travel / Pay policies, or jobs/news/contact on denbighshire.gov.uk."
)

demo.launch(share=True)
