In [None]:
!pip install -q google-generativeai faiss-cpu PyPDF2 langchain

import os
import numpy as np
import faiss
import google.generativeai as genai
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document


In [134]:
# Set up Gemini API
api_key = os.getenv("GEMINI_API_KEY", "AIzaSyA7rF3n6D24zICDBH80sYfiEyhyMFDo82M")
genai.configure(api_key=api_key)

# Gemini interaction wrapper
class GoogleAI:
    def __init__(self):
        self.chat_model = genai.GenerativeModel("gemini-1.5-flash")

    def embed_query(self, text):
        result = genai.embed_content(
            model="models/embedding-001",
            content=text,
            task_type="retrieval_query"
        )
        return result["embedding"]

    def embed_documents(self, texts):
        return [
            genai.embed_content(
                model="models/embedding-001",
                content=text,
                task_type="retrieval_document"
            )["embedding"] for text in texts
        ]

    def chat(self, prompt):
        response = self.chat_model.generate_content(prompt)
        return response.text


In [None]:
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"
    return text

def text_split(raw_text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=3000,
        chunk_overlap=100,
        separators=["\n\n", "\n", " ", "."]
    )
    chunks = text_splitter.split_text(raw_text)
    return [Document(page_content=chunk) for chunk in chunks]


In [138]:
def create_faiss_index(embeddings_list):
    dimension = len(embeddings_list[0])
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings_list).astype('float32'))
    return index


In [None]:
google_ai = GoogleAI()

pdf_path = "../Data/ReferenceBook.pdf"  # change path if needed
raw_text = extract_text_from_pdf(pdf_path)

documents = text_split(raw_text)
chunk_texts = [doc.page_content for doc in documents]


print("Embedding chunks...")
chunk_embeddings = google_ai.embed_documents(chunk_texts)

print("Creating FAISS index...")
faiss_index = create_faiss_index(chunk_embeddings)


Embedding chunks...


In [None]:
def query_bot(query, k=5):
    query = query.strip()
    if not query:
        return "Please enter a valid question."

    try:
        query_emb = google_ai.embed_query(query)
        D, I = faiss_index.search(np.array([query_emb]).astype("float32"), k)
        retrieved_chunks = [chunk_texts[i] for i in I[0] if i < len(chunk_texts)]

        if not retrieved_chunks:
            return "Sorry, I couldn't find relevant context."

        context = "\n\n".join(retrieved_chunks).strip()

        prompt = f"""You are a knowledgeable and concise medical assistant. Use the information below to answer the question clearly. If the context lacks enough information, reply with 'Context not sufficient'.

Context:
{context}

Question: {query}

Answer:"""
    

        return google_ai.chat(prompt)

    except Exception as e:
        return f"Error: {str(e)}"


In [None]:
while True:
    query = input("Ask your medical question (or type 'exit'): ").strip()
    if query.lower() == "exit":
        print("Goodbye!")
        break
    response = query_bot(query)
    print("Bot:", response, "\n")
