In [None]:
!pip install pypdf chromadb google-generativeai langchain-community


In [None]:
!pip install -U google-genai


In [None]:
import os
from google.genai import Client
from google.genai.types import Content, Part


In [None]:
# -------------------------------------------------------------------------
# üîë SECURITY STEP: SAFE API KEY HANDLING (Run this first)
# -------------------------------------------------------------------------
import os
import google.generativeai as genai
from google.genai import Client # This handles the 'client = Client()' part
from google.colab import userdata

# 1. Fetch the key safely
try:
    # Try to load from Colab Secrets
    SECRET_KEY = userdata.get('GOOGLE_API_KEY')
except Exception:
    # If not found (e.g., for GitHub users), ask for input
    print("‚ö†Ô∏è Key not found in Secrets.")
    SECRET_KEY = input("Please enter your Google Gemini API Key: ")

# 2. Configure the FIRST library (google.generativeai)
genai.configure(api_key=SECRET_KEY)

# 3. Configure the SECOND library (google.genai)
client = Client(api_key=SECRET_KEY)

print("‚úÖ Security Check: API Key configured for both libraries!")

In [None]:
from pypdf import PdfReader

def extract_text_from_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_text(text, chunk_size=800, chunk_overlap=150):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return splitter.split_text(text)


In [None]:
def get_embedding_new(text):
    # Use genai.embed_content directly, which is part of google.generativeai
    res = genai.embed_content(
        model="models/text-embedding-004", # Model name for embedding
        content=text
    )
    return res['embedding']

In [None]:
import chromadb
from chromadb.config import Settings

# Initialize an in-memory ChromaDB client for no persistence.
# The default client is in-memory DuckDB.
chroma_client = chromadb.Client()

collection = chroma_client.get_or_create_collection("pdf_rag")

In [None]:
from google.colab import files

uploaded = files.upload()
file_name = next(iter(uploaded))

pdf_text = extract_text_from_pdf(file_name)
chunks = chunk_text(pdf_text)
embeddings = embed_chunks(chunks)

# add to Chroma
ids = [str(i) for i in range(len(chunks))]
collection.add(documents=chunks, embeddings=embeddings, ids=ids)

len(chunks)


In [None]:
def answer_question(question, top_k=3):
    # 1) embed question
    q_emb = get_embedding_new(question)

    # 2) retrieve chunks
    results = collection.query(
        query_embeddings=[q_emb],
        n_results=top_k
    )

    retrieved_docs = results["documents"][0]
    context = "\n\n".join(retrieved_docs)

    # 3) build prompt
    prompt = f"""
    Answer the following question using ONLY the context below.

    CONTEXT:
    {context}

    QUESTION:
    {question}

    ANSWER:
    """

    # 4) generate answer
    model = genai.GenerativeModel('models/gemini-pro-latest') # Changed model name to 'models/gemini-pro-latest'
    response = model.generate_content(
        contents=prompt
    )

    return response.text

In [None]:
print('Listing available models:')
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

In [None]:
question = 'what skills does this candidate have?'
answer = answer_question(question)
print(answer)

In [None]:
question = input("Ask something: ")
print(answer_question(question))


In [None]:
!pip install gradio


In [None]:
import gradio as gr
import time
import google.generativeai as genai
import chromadb
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# --- 1. SETUP & CONFIGURATION ---
# (Assumes you have already run genai.configure(api_key=...) in previous cells)

# Initialize ChromaDB
chroma_client = chromadb.Client()
COLLECTION_NAME = "gradio_pdf_rag"

# Ensure clean start
try:
    chroma_client.delete_collection(COLLECTION_NAME)
except:
    pass
collection = chroma_client.create_collection(COLLECTION_NAME)

# --- 2. REAL BACKEND FUNCTIONS (Adapted from your Notebook) ---

def extract_text_from_pdf_real(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text

def chunk_text_real(text, chunk_size=800, chunk_overlap=150):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return splitter.split_text(text)

def get_embedding_real(text):
    # Using your specific model from the notebook
    res = genai.embed_content(
        model="models/text-embedding-004",
        content=text
    )
    return res['embedding']

def answer_question_real(question, top_k=5):
    # 1. Embed the user's question
    q_emb = get_embedding_real(question)

    # 2. Query ChromaDB
    results = collection.query(
        query_embeddings=[q_emb],
        n_results=top_k
    )

    # Check if we found anything
    if not results['documents'] or not results['documents'][0]:
        return "I couldn't find any relevant information in the PDF."

    # 3. Prepare Context
    retrieved_docs = results["documents"][0]
    context = "\n\n".join(retrieved_docs)

    # 4. Construct Prompt
    prompt = f"""
    Answer the following question using ONLY the context provided below.
    If the answer is not in the context, state that you don't know.

    CONTEXT:
    {context}

    QUESTION:
    {question}

    ANSWER:
    """

    # 5. Generate Answer with Gemini
    model = genai.GenerativeModel('models/gemini-pro-latest')
    response = model.generate_content(prompt)
    return response.text

# --- 3. GRADIO LOGIC (Connecting UI to Backend) ---

def process_pdf_ui(file):
    if file is None:
        return False, gr.Info("‚ö†Ô∏è Please upload a PDF file first.")

    try:
        # Reset collection for new file
        global collection
        try:
            chroma_client.delete_collection(COLLECTION_NAME)
        except:
            pass
        collection = chroma_client.create_collection(COLLECTION_NAME)

        # 1. Extract
        text = extract_text_from_pdf_real(file.name)

        # 2. Chunk
        chunks = chunk_text_real(text)

        # 3. Embed & Store (Process loop)
        # Note: In production, batching is better, but this works for valid notebook use
        embeddings = []
        ids = []
        for i, chunk in enumerate(chunks):
            emb = get_embedding_real(chunk)
            embeddings.append(emb)
            ids.append(str(i))

        collection.add(documents=chunks, embeddings=embeddings, ids=ids)

        return True, gr.Info("‚úÖ PDF Processed Successfully! Ask away.")

    except Exception as e:
        return False, gr.Info(f"‚ùå Error: {str(e)}")

def chat_ui(message, history, is_processed):
    if not is_processed:
        return "‚ö†Ô∏è Please upload and process a PDF using the sidebar first."

    # Call the real answer function
    return answer_question_real(message)

# --- 4. UI LAYOUT ---
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="slate",
    text_size="lg"
)

with gr.Blocks(theme=theme, title="Gemini RAG Analyst") as demo:

    # State to track if PDF is ready
    pdf_state = gr.State(False)

    with gr.Row():
        # --- Sidebar ---
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("## üìÇ Document Hub")
            file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
            process_btn = gr.Button("üöÄ Process PDF", variant="primary")

            # Invisible element to catch output updates
            status_txt = gr.Markdown(visible=False)

            gr.Markdown("---")
            gr.Markdown("**Instructions:**\n1. Upload PDF\n2. Click Process\n3. Wait for Success\n4. Chat")

        # --- Chat Area ---
        with gr.Column(scale=3):
            gr.Markdown("## ü§ñ AI Research Assistant")

            chatbot = gr.Chatbot(height=600, type="messages", show_copy_button=True)

            chat_int = gr.ChatInterface(
                fn=chat_ui,
                chatbot=chatbot,
                additional_inputs=[pdf_state],
                textbox=gr.Textbox(placeholder="Ask a question about the uploaded PDF..."),
                theme="soft"
            )

    # Event Listener
    process_btn.click(
        fn=process_pdf_ui,
        inputs=[file_input],
        outputs=[pdf_state, status_txt]
    )

if __name__ == "__main__":
    demo.launch(debug=True)