<a href="https://colab.research.google.com/github/thineshnatarajan/Mr.-Medio-AI-medpub/blob/main/MR_Medico_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#  Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython gradio google-generativeai

#  Import necessary libraries
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

#  API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyA7zpqJHh1bE8unQJYwYgzud2DJUAuDGoI"  # Gemini API key (public)
Entrez.email = "thineshnatarajan515@gmail.com"  # Your email for PubMed

# Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# Create Gemini-based QA system
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# Gradio interface
def gradio_interface(user_query):
    articles = fetch_pubmed_articles(user_query, max_results=5)
    if not articles:
        return "❌ No articles found on this topic. Try a different question.", ""

    vectorstore = build_vectorstore_from_articles(articles)
    qa_chain = create_qa_chain(vectorstore)

    result = qa_chain(user_query)
    answer = result["result"]
    sources = "\n\n".join([f"Source {i+1}:\n{doc.page_content[:500]}..." for i, doc in enumerate(result["source_documents"])])

    return answer, sources

# Launch Gradio UI
gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="💬 Enter your medical/healthcare question"),
    outputs=[
        gr.Textbox(label="🩺 Answer"),
        gr.Textbox(label="📚 Sources", lines=10)
    ],
    title="PubMed Health QA with Gemini",
    description="Ask a medical question and get an AI-generated answer based on real PubMed articles using Google's Gemini."
).launch()

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m80.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

