In [5]:
import fitz  # PyMuPDF for PDF processing
import faiss
import numpy as np
import google.generativeai as genai

# 🔑 Set up Gemini API Key
GEMINI_API_KEY = ""
# Replace with your API key
genai.configure(api_key=GEMINI_API_KEY)

# ✅ Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = "\n".join([page.get_text("text") for page in doc])
    return text

# ✅ Function to split text into smaller chunks
def split_text(text, chunk_size=500, overlap=50):
    chunks = []
    for i in range(0, len(text), chunk_size - overlap):
        chunks.append(text[i : i + chunk_size])
    return chunks

# ✅ Function to generate embeddings using Gemini API
def get_gemini_embeddings(texts):
    model = "models/text-embedding-004"  # Free embedding model
    embeddings = []
    for text in texts:
        response = genai.embed_content(model=model, content=text, task_type="retrieval_document")
        embeddings.append(response["embedding"])
    return np.array(embeddings, dtype="float32")

# ✅ Function to create FAISS vector store
def create_faiss_index(embeddings):
    dim = embeddings.shape[1]  # Get embedding dimension
    index = faiss.IndexFlatL2(dim)  # L2 distance-based FAISS index
    index.add(embeddings)
    return index

# ✅ Function to query FAISS and get relevant chunks
def query_faiss(index, text_chunks, query, top_k=3):
    query_embedding = get_gemini_embeddings([query])  # Get query embedding
    distances, indices = index.search(query_embedding, top_k)  # Search in FAISS

    results = [text_chunks[i] for i in indices[0]]
    return results

# ✅ Function to generate a final answer using Gemini
def generate_answer(query, retrieved_text):
    prompt = f"""
    You are an AI assistant. The user asked the question:
    "{query}"

    Below is the most relevant information extracted from a document:
    {retrieved_text}

    Please provide a **concise and direct answer** based only on the given text.
    """

    model = genai.GenerativeModel("gemini-1.5-pro")  # ✅ Correct way to call Gemini
    response = model.generate_content(prompt)

    return response.text.strip()

# ✅ Main Execution
if __name__ == "__main__":
    pdf_path = "/content/constitution_of_india_summary.pdf"# 🔹 Replace with your PDF file path
    text = extract_text_from_pdf(pdf_path)  # Extract text
    text_chunks = split_text(text)  # Split into chunks
    embeddings = get_gemini_embeddings(text_chunks)  # Generate embeddings

    index = create_faiss_index(embeddings)  # Create FAISS index

    while True:
        query = input("\n🔹 Ask a question about the document (or type 'exit' to quit): ")
        if query.lower() == "exit":
            break

        retrieved_text = " ".join(query_faiss(index, text_chunks, query))  # Combine retrieved chunks
        answer = generate_answer(query, retrieved_text)  # Process with Gemini

        print("\n📌 Answer:")
        print(answer)




🔹 Ask a question about the document (or type 'exit' to quit): quit

📌 Answer:
The provided text does not contain information related to quitting.

🔹 Ask a question about the document (or type 'exit' to quit): exit
