# AI-Powered HR Assistant (RAG + Gradio)

This notebook builds an **HR Policy Q&A assistant** using:
- PDF loading + chunking
- Embeddings (SentenceTransformers)
- FAISS vector search
- Optional OpenAI chat model for final answers
- Gradio UI

**Input PDF:** `nestle_hr_policy.pdf` (upload to runtime)


In [None]:
# âœ… 1) Install dependencies
!pip -q install -U gradio pypdf sentence-transformers faiss-cpu openai


In [None]:
# âœ… 2) Upload PDF (Colab only)
from google.colab import files
uploaded = files.upload()
print('Uploaded:', list(uploaded.keys()))


In [None]:
# âœ… 3) (Optional) Set OpenAI credentials
import os

# If using Vocareum-like OpenAI proxy:
# os.environ["OPENAI_API_KEY"] = "YOUR_KEY"
# os.environ["OPENAI_BASE_URL"] = "https://openai.vocareum.com/v1"

# If using standard OpenAI:
# os.environ["OPENAI_API_KEY"] = "YOUR_KEY"

# Optional model override:
# os.environ["OPENAI_MODEL"] = "gpt-4o-mini"


In [None]:
# âœ… 4) Load + chunk the PDF
from pypdf import PdfReader
import re

PDF_PATH = "nestle_hr_policy.pdf"

reader = PdfReader(PDF_PATH)

raw_pages = []
for i, page in enumerate(reader.pages):
    txt = page.extract_text() or ""
    txt = re.sub(r"\s+", " ", txt).strip()
    raw_pages.append({"page": i + 1, "text": txt})

print("Pages:", len(raw_pages))
print("Sample (page 1):", raw_pages[0]["text"][:300], "...")

def chunk_text(text: str, chunk_size: int = 900, overlap: int = 150):
    if not text:
        return []
    chunks = []
    start = 0
    while start < len(text):
        end = min(len(text), start + chunk_size)
        chunks.append(text[start:end])
        if end == len(text):
            break
        start = max(0, end - overlap)
    return chunks

chunks = []
metas = []
for p in raw_pages:
    for ch in chunk_text(p["text"]):
        if ch.strip():
            chunks.append(ch)
            metas.append({"page": p["page"]})

print("Total chunks:", len(chunks))
print("Sample chunk:", chunks[0][:200], "...")


In [None]:
# âœ… 5) Build embeddings + FAISS (cosine similarity)
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embed_model = SentenceTransformer(EMBED_MODEL_NAME)

# Normalize embeddings => inner product == cosine similarity
emb = embed_model.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)

dim = emb.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(emb)

print("Index size:", index.ntotal)


In [None]:
# âœ… 6) Retrieval helper
def retrieve(query: str, k: int = 4):
    q = embed_model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
    scores, ids = index.search(q, k)

    results = []
    for score, idx in zip(scores[0], ids[0]):
        if idx == -1:
            continue
        results.append({
            "score": float(score),
            "text": chunks[int(idx)],
            "page": metas[int(idx)]["page"],
            "chunk_id": int(idx),
        })
    return results

# Quick test
test_q = "What does the policy say about harassment?"
res = retrieve(test_q, k=3)
for r in res:
    print(f"Page {r['page']} | score={r['score']:.3f} | {r['text'][:120]}...")


In [None]:
# âœ… 7) (Optional) LLM answer with citations
from openai import OpenAI

client = OpenAI() if os.getenv("OPENAI_API_KEY") else None

SYSTEM = (
    "You are an HR policy assistant. "
    "Answer ONLY using the provided context. "
    "If the answer is not in the context, say you don't know. "
    "Keep answers clear and concise, and cite page numbers."
)

def answer_with_llm(question: str, k: int = 4):
    retrieved = retrieve(question, k=k)
    context = "\n\n".join([f"(Page {r['page']}) {r['text']}" for r in retrieved])

    if client is None:
        # Retrieval-only fallback
        return (
            "LLM is disabled (no OPENAI_API_KEY set).\n\n"
            "Top retrieved context:\n" + context[:2500],
            retrieved,
        )

    resp = client.chat.completions.create(
        model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
        temperature=0.2,
        messages=[
            {"role": "system", "content": SYSTEM},
            {"role": "user", "content": f"Question: {question}\n\nContext:\n{context}"},
        ],
    )
    return resp.choices[0].message.content, retrieved

ans, retrieved = answer_with_llm("What is the policy position on discrimination?", k=4)
print(ans)
print("Sources:", sorted({r["page"] for r in retrieved}))


In [None]:
# âœ… 8) Gradio UI
import gradio as gr

def chat_fn(message, history, top_k):
    answer, retrieved = answer_with_llm(message, k=int(top_k))
    sources = "\n".join([f"- Page {r['page']} (score {r['score']:.3f})" for r in retrieved])
    return answer + "\n\n---\n**Sources (retrieved):**\n" + sources

with gr.Blocks() as demo:
    gr.Markdown("# ðŸ“˜ HR Policy Assistant (RAG)")
    gr.Markdown("Ask questions about the uploaded HR policy PDF. The app retrieves relevant sections and (optionally) uses an LLM to answer.")

    top_k = gr.Slider(1, 8, value=4, step=1, label="Top-K chunks to retrieve")
    chat = gr.ChatInterface(fn=lambda msg, hist: chat_fn(msg, hist, top_k.value))

demo.launch(share=True)
