In [1]:
# ==============================
# STEP 1: Install Required Libraries
# ==============================
!pip install -q chromadb langchain pypdf gradio langchain-community
!pip install -q google-generativeai langchain-google-genai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 kB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m91.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m87.0 MB/s[0m eta [36m0:00:0

In [2]:
# ==============================
# STEP 2: Import Libraries
# ==============================
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
import gradio as gr

In [7]:
# ==============================
# STEP 3: Setup Google Gemini API Key
# ==============================
# Use Colab's Variables (formerly userdata) to store your API key securely:
# In Colab: Settings (gear icon) -> Variables -> Add GOOGLE_API_KEY
from google.colab import userdata
os.environ["GOOGLE_API_KEY"] = userdata.get("GEMINI_API_KEY")

In [8]:
# ==============================
# STEP 4: Load and Split PDF
# ==============================
pdf_path = "/content/hr_policy.pdf"  # Upload your PDF to this path
loader = PyPDFLoader(pdf_path)
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(documents)

In [9]:
# ==============================
# STEP 5: Create Embeddings + Vector Store (Gemini)
# ==============================
# "text-embedding-004" is the current Gemini embedding model
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
# Create a new Chroma collection with a different name to avoid dimension mismatch
vectorstore = Chroma.from_documents(docs, embeddings, collection_name="hr_policy_gemini_embeddings")

In [12]:
# ==============================
# STEP 6: Create QA Chain (Gemini LLM)
# ==============================
# Choose a Gemini chat model: "gemini-1.5-pro" (stronger) or "gemini-1.5-flash" (faster, cheaper)
llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", temperature=0)
retriever = vectorstore.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
)

In [13]:
# ==============================
# STEP 7: Gradio Chatbot (Bigger Textboxes)
# ==============================
def chatbot(query):
    try:
        return qa_chain.run(query)
    except Exception as e:
        return f"Error: {e}"

demo = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(label="Ask HR Assistant a question", lines=3, placeholder="Type your HR question here..."),
    outputs=gr.Textbox(label="Answer", lines=12),
    title="AI-Powered HR Assistant (Gemini)"
)

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1bbad6f6f8f53bfe4b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


