In [1]:

# Setup Environment

import os, warnings
warnings.filterwarnings('ignore')

# Project directories
os.makedirs('medical_rag_bot/docs', exist_ok=True)
os.chdir('/content/medical_rag_bot')

# Install required packages
!pip install -q google-generativeai langchain langchain-google-genai chromadb PyMuPDF langchain-community

print("✅ Environment ready! Docs folder: medical_rag_bot/docs")


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m79.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m54.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00

In [2]:
import google.generativeai as genai
from langchain.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from getpass import getpass


In [3]:
# Secure Gemini API Key Input
# ==========================
API_KEY = getpass("Enter your Gemini API key: ")
os.environ["GOOGLE_API_KEY"] = API_KEY
genai.configure(api_key=API_KEY)
print("✅ Gemini API configured!")

Enter your Gemini API key: ··········
✅ Gemini API configured!


In [4]:
# Document Upload
# ==========================
from google.colab import files
import shutil

print("📋 Upload your medical PDFs")
uploaded = files.upload()
for filename in uploaded.keys():
    if filename.endswith('.pdf'):
        shutil.move(filename, f'docs/{filename}')
        print(f"✅ Uploaded: {filename}")

📋 Upload your medical PDFs


Saving Medical_book.pdf to Medical_book.pdf
✅ Uploaded: Medical_book.pdf


In [5]:
#  Document Processing
print("📖 Processing documents...")

documents = []
for filename in os.listdir("docs"):
    if filename.endswith(".pdf"):
        loader = PyMuPDFLoader(f"docs/{filename}")
        docs = loader.load()
        documents.extend(docs)
        print(f"📄 {filename}: {len(docs)} pages")

if documents:
    splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=200)
    chunks = splitter.split_documents(documents)
    print(f"✅ Split into {len(chunks)} chunks")
else:
    print("❌ No documents found!")


📖 Processing documents...
📄 Medical_book.pdf: 637 pages
✅ Split into 2784 chunks


In [6]:
# Vector Database Setup
print("🧬 Creating Chroma Vector DB...")

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = Chroma.from_documents(chunks, embeddings, persist_directory="medical_db")
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)

print("✅ Vector DB + LLM ready!")


🧬 Creating Chroma Vector DB...
✅ Vector DB + LLM ready!


In [7]:
#  Hybrid Q&A Functions
# caching
import hashlib
cache = {}  # Simple memory cache

def cached_hybrid_query(query: str):
    """Cache responses to avoid repeated API calls"""
    # Create a simple key from the query
    cache_key = query.lower().strip()

    # Check if we've seen this question before
    if cache_key in cache:
        print("💡 Using cached response!")
        return cache[cache_key]

    # If new question, process normally
    print("🔄 Processing new query...")
    result = hybrid_query(query)

    # Save result for next time
    cache[cache_key] = result
    return result

# Create RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",           # "map_reduce" for large docs
    return_source_documents=True  # fetch source docs for reference
)

def ask_llm_first(query: str):
    """Try LLM directly. If unsure, return None."""
    system_prompt = f"""
    You are a medical AI assistant. Answer only if you are confident.
    If you don’t know, reply exactly: "I don't know."

    Question: {query}
    """
    response = llm.invoke(system_prompt)
    content = response.content if hasattr(response, "content") else str(response)

    if "i don't know" in content.lower():
        return None
    return content

def ask_with_rag(query: str):
    """Fallback to RetrievalQA if LLM is unsure."""
    result = qa_chain.invoke({"query": query})
    answer = result["result"]

    sources = [doc.metadata.get("source", "Unknown") for doc in result["source_documents"]]
    sources = list(dict.fromkeys(sources))  # remove duplicates
    return answer, sources

def hybrid_query(query: str):
    """Main hybrid function: LLM first → fallback to RAG."""
    llm_answer = ask_llm_first(query)
    if llm_answer:
        return f"🧠 DIRECT: {llm_answer}"
    else:
        rag_answer, sources = ask_with_rag(query)
        return f"📚 RAG: {rag_answer}\n\n📎 Sources: {sources}"


In [8]:
# Interactive Chat

def start_chat():
    print("🏥 Medical Chatbot (Type 'exit' to quit)\n")
    while True:
        user_q = input("💬 You: ").strip()
        if user_q.lower() == "exit":
            print("👋 Goodbye!")
            break
        answer = cached_hybrid_query(user_q)  # NEW FUNCTION
        print(f"\n🤖 {answer}\n")

# Start chatbot
start_chat()



🏥 Medical Chatbot (Type 'exit' to quit)

💬 You: how much percentage is reported in excitation of people using SSRIs
🔄 Processing new query...

🤖 📚 RAG: Over 20% of patients reported excitation.

📎 Sources: ['docs/Medical_book.pdf']

💬 You: In the large majority of how many cases, the facial weakness or paralysis is temporary?
🔄 Processing new query...

🤖 📚 RAG: In the large majority of cases (80-85%), the facial weakness or paralysis is temporary.

📎 Sources: ['docs/Medical_book.pdf']

💬 You: In the large majority of how many cases, the facial weakness or paralysis is temporary?
💡 Using cached response!

🤖 📚 RAG: In the large majority of cases (80-85%), the facial weakness or paralysis is temporary.

📎 Sources: ['docs/Medical_book.pdf']

💬 You: breast feeding increases the transmission of aids by how much percentage
🔄 Processing new query...

🤖 📚 RAG: Breastfeeding increases the risk of HIV transmission by 10-20%.

📎 Sources: ['docs/Medical_book.pdf']

💬 You: exit
👋 Goodbye!


In [9]:
import gradio as gr

# Gradio function for chat
def chat_ui(user_input, history=[]):
    # Get answer from hybrid_query
    answer = cached_hybrid_query(user_input)  # NEW FUNCTION

    # Append to history
    history = history + [(user_input, answer)]
    return history, history

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🏥 Medical Chatbot (RAG + LLM)")
    chatbot = gr.Chatbot(label="Medical Chatbot")  # proper chat display
    user_input = gr.Textbox(
        placeholder="Ask your medical question here...",
        show_label=False
    )
    submit_btn = gr.Button("Send")

    submit_btn.click(chat_ui, inputs=[user_input, chatbot], outputs=[chatbot, chatbot])
    user_input.submit(chat_ui, inputs=[user_input, chatbot], outputs=[chatbot, chatbot])

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://644f8872758c1aefd0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


