<a href="https://colab.research.google.com/github/tiashamondal29/Hr-policy-rag-Gradio-assistant/blob/main/Crafting_an_AI_Powered_HR_Assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==============================
# STEP 1: Install Required Libraries
# ==============================
!pip install -q chromadb langchain pypdf gradio langchain-community
!pip install -q google-generativeai langchain-google-genai
!pip install -q sentence-transformers langchain-text-splitters

# ==============================
# STEP 2: Import Libraries
# ==============================
import os
from datetime import datetime
import gradio as gr

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# ==============================
# STEP 3: Setup Google Gemini API Key
# ==============================
from google.colab import userdata
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")




In [2]:
# ==============================
# STEP 4: Initialize Components
# ==============================
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite", temperature=0)
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Create directory for vector database
PERSIST_DIR = "/content/vector_db"
os.makedirs(PERSIST_DIR, exist_ok=True)

vectorstore = None
retriever = None
rag_chain = None     # replaces qa_chain
feedback_log = []

# Helper: format retrieved docs into a single context string
def format_docs(docs):
    return "\n\n---\n\n".join([d.page_content for d in docs])

# ==============================
# STEP 5: Chatbot Function
# ==============================
def chatbot(message, history):
    try:
        global rag_chain
        if rag_chain is None:
            return "‚ö†Ô∏è Please upload a PDF document first using the 'Upload Document' tab."

        # LCEL: invoke the RAG chain with the question
        response = rag_chain.invoke(message)

        # ChatGoogleGenerativeAI returns an AIMessage with .content
        return response.content if hasattr(response, "content") else str(response)

    except Exception as e:
        return f"‚ùå Error: {str(e)}"

# ==============================
# STEP 6: Upload PDF Function
# ==============================
def process_new_pdf(file):
    try:
        if file is None:
            return "‚ö†Ô∏è Please upload a PDF file first."

        # Load PDF
        loader = PyPDFLoader(file)
        documents = loader.load()

        # Split into chunks
        docs = splitter.split_documents(documents)

        # Create vector database
        global vectorstore, retriever, rag_chain
        vectorstore = Chroma.from_documents(
            docs,
            embeddings,
            collection_name="hr_policy_hf_embeddings",
            persist_directory=PERSIST_DIR
        )
        vectorstore.persist()

        # Setup retriever
        retriever = vectorstore.as_retriever()

        # Define RAG prompt
        prompt = ChatPromptTemplate.from_template(
            """
You are a helpful HR assistant. Use ONLY the following HR policy context to answer.
If the answer is not clearly present, say you don't know.

Context:
{context}

Question:
{question}

Answer in very simple language so that any employee can understand.
"""
        )


        rag_chain = (
            RunnableParallel(
                context=retriever | format_docs,
                question=RunnablePassthrough(),
            )
            | prompt
            | llm
        )

        return f"""‚úÖ Successfully processed!
üìÑ Total chunks: {len(docs)}
üìÇ Saved to: {PERSIST_DIR}

üëâ Go to 'Chat' tab to ask questions!"""

    except Exception as e:
        return f"‚ùå Error: {str(e)}"

# ==============================
# STEP 7: Feedback Function
# ==============================
def save_feedback(feedback_text):
    if not feedback_text or feedback_text.strip() == "":
        return "‚ö†Ô∏è Please enter feedback before submitting."

    feedback_log.append({
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "feedback": feedback_text
    })
    return "‚úÖ Thank you for your feedback!"

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 252b5ea5-5d00-442b-aa71-8404282a36b6)')' thrown while requesting HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/./sentence_bert_config.json
Retrying in 1s [Retry 1/5].
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 5f8cf787-5fed-4308-8ee5-d54c22ebe9aa)')' thrown

In [3]:
# ==============================
# STEP 8: Create Gradio Interface (Updated UI)
# ==============================
import gradio as gr

APP_CSS = """
:root { --radius: 14px; }

#app-header {
  padding: 14px 18px;
  border-radius: var(--radius);
  border: 1px solid rgba(255,255,255,0.08);
  background: linear-gradient(135deg, rgba(255,255,255,0.06), rgba(255,255,255,0.02));
  margin-bottom: 14px;
}

.badge {
  display: inline-flex;
  gap: 8px;
  align-items: center;
  padding: 6px 10px;
  border-radius: 999px;
  border: 1px solid rgba(255,255,255,0.10);
  background: rgba(255,255,255,0.04);
  font-size: 12px;
  margin-right: 8px;
}

.kpanel {
  border-radius: var(--radius);
  border: 1px solid rgba(255,255,255,0.10);
  background: rgba(255,255,255,0.03);
  padding: 14px;
}

.small {
  font-size: 12px;
  opacity: 0.9;
}
"""

def get_system_status():
    """Small helper to display backend readiness in the UI (no change to RAG logic)."""
    ready = rag_chain is not None
    db = "Ready ‚úÖ" if ready else "Not ready ‚ùå"
    chunks = "Loaded ‚úÖ" if ready else "‚Äî"
    return f"{db}", f"{chunks}"

def set_example(q):
    """Convenience: put suggested question into textbox."""
    return q

with gr.Blocks(theme=gr.themes.Soft(), title="AI HR Assistant", css=APP_CSS) as demo:

    # --- Header ---
    gr.Markdown(
        """
<div id="app-header">
  <div style="display:flex;justify-content:space-between;align-items:flex-start;gap:12px;flex-wrap:wrap;">
    <div>
      <h2 style="margin:0;">ü§ñ Nestl√© HR Policy Assistant</h2>
      <div class="small">Ask questions about HR policies and get answers grounded in the uploaded PDF.</div>
    </div>
    <div style="text-align:right;">
      <span class="badge">üîé RAG + Vector Search</span>
      <span class="badge">üìÑ PDF-grounded</span>
      <span class="badge">üß© Gradio UI</span>
    </div>
  </div>
</div>
        """,
    )

    with gr.Tabs():

        # =======================
        # TAB 1: CHAT (Revamped)
        # =======================
        with gr.Tab("üí¨ Chat"):
            with gr.Row(equal_height=True):
                # Left: Chat
                with gr.Column(scale=7):
                    gr.Markdown("### Chat with your HR Assistant")
                    chat = gr.Chatbot(label="Conversation", height=430)

                    with gr.Row():
                        msg = gr.Textbox(
                            label="Type your question",
                            placeholder="e.g., What is the annual leave policy?",
                            lines=2
                        )
                    with gr.Row():
                        send = gr.Button("Send ‚û§", variant="primary")
                        clear = gr.Button("Clear", variant="secondary")

                    # Use the same backend chatbot(message, history)
                    def chat_submit(user_message, history):
                        # history is list[tuple[user, bot]]
                        bot_reply = chatbot(user_message, history)
                        history = history + [(user_message, bot_reply)]
                        return history, ""

                    send.click(fn=chat_submit, inputs=[msg, chat], outputs=[chat, msg])
                    msg.submit(fn=chat_submit, inputs=[msg, chat], outputs=[chat, msg])

                    clear.click(lambda: [], outputs=chat)

                # Right: Knowledge Panel
                with gr.Column(scale=4):
                    gr.Markdown("### Knowledge Panel")
                    with gr.Group(elem_classes=["kpanel"]):
                        gr.Markdown(
                            """
**Quick tips**
- Upload the HR policy PDF in the **Upload Document** tab first.
- Ask short, specific questions for best results.
- If the answer isn't in the document, the bot should say *I don't know*.
                            """
                        )

                        db_status = gr.Textbox(label="Vector DB", interactive=False)
                        chunk_status = gr.Textbox(label="Document Chunks", interactive=False)
                        refresh = gr.Button("üîÑ Refresh status")

                        refresh.click(fn=get_system_status, inputs=[], outputs=[db_status, chunk_status])

                        gr.Markdown("**Suggested questions**")
                        ex1 = gr.Button("What is the leave policy?")
                        ex2 = gr.Button("How do I request time off?")
                        ex3 = gr.Button("What are employee benefits?")
                        ex4 = gr.Button("What is the work from home policy?")
                        ex5 = gr.Button("How do I submit expense claims?")

                        ex1.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
                        ex2.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
                        ex3.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
                        ex4.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
                        ex5.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)

            gr.Markdown(
                "<div class='small'>Note: Answers are generated from the uploaded policy document. If you upload a new PDF, refresh status.</div>"
            )

        # =======================
        # TAB 2: UPLOAD DOCUMENT
        # =======================
        with gr.Tab("üì§ Upload Document"):
            gr.Markdown("### Upload and index your HR policy PDF")

            with gr.Row():
                with gr.Column(scale=6):
                    file_input = gr.File(
                        label="Upload HR Policy PDF",
                        file_types=[".pdf"],
                        type="filepath"
                    )
                    upload_btn = gr.Button("üîÑ Process Document", variant="primary")

                with gr.Column(scale=6):
                    status_output = gr.Textbox(label="Processing Status", lines=10, interactive=False)

            upload_btn.click(
                fn=process_new_pdf,
                inputs=[file_input],
                outputs=[status_output]
            )

            gr.Markdown(
                """
**How it works**
1. Upload PDF
2. We split it into chunks
3. Create embeddings + store in Chroma
4. Chat tab will retrieve relevant chunks and answer
                """
            )

        # =======================
        # TAB 3: FEEDBACK
        # =======================
        with gr.Tab("üí≠ Feedback"):
            gr.Markdown("### Help us improve")

            feedback_text = gr.Textbox(
                label="Your feedback",
                lines=6,
                placeholder="What worked well? What felt wrong or missing?"
            )
            with gr.Row():
                feedback_btn = gr.Button("üì® Submit Feedback", variant="primary")
                feedback_clear = gr.Button("Clear")

            feedback_status = gr.Textbox(label="Status", interactive=False)

            feedback_btn.click(
                fn=save_feedback,
                inputs=[feedback_text],
                outputs=[feedback_status]
            )
            feedback_clear.click(lambda: "", outputs=feedback_text)




  with gr.Blocks(theme=gr.themes.Soft(), title="AI HR Assistant", css=APP_CSS) as demo:
  with gr.Blocks(theme=gr.themes.Soft(), title="AI HR Assistant", css=APP_CSS) as demo:
  chat = gr.Chatbot(label="Conversation", height=430)
  chat = gr.Chatbot(label="Conversation", height=430)
  ex1.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
  ex2.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
  ex3.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
  ex4.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)
  ex5.click(fn=set_example, inputs=[], outputs=msg, api_name=False).then(None, None, None)


In [6]:
import gradio as gr
gr.close_all()

Closing server running on port: 7860


In [7]:
import os

port = int(os.environ.get("PORT", 7860))
host = "0.0.0.0"
print(f"üöÄ Starting Gradio on {host}:{port}")

demo.launch(
    server_name=host,
    server_port=port,
    share=True,
)


üöÄ Starting Gradio on 0.0.0.0:7860
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://eacc9b99458b1cdb50.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


