<a href="https://colab.research.google.com/github/keerthana179-keke/2022305018/blob/main/Healthcarebot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Step 1: Install required packages
# Run these in your terminal or notebook if not already installed:
# pip install langchain langchain-google-genai biopython

# Step 2: Import libraries
import os
from Bio import Entrez
from langchain.chat_models import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document

# Step 3: Set up Gemini API key (get it from https://makersuite.google.com/app)
os.environ["GOOGLE_API_KEY"] = "your_google_api_key_here"

# Step 4: Set email for PubMed API
Entrez.email = "your_email@example.com"

# Step 5: Function to fetch abstracts from PubMed
def fetch_pubmed_abstracts(query, max_results=3):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]

    if not ids:
        return ["No relevant articles found."]

    fetch_handle = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="abstract", retmode="text_

SyntaxError: unterminated string literal (detected at line 27) (<ipython-input-1-31d5dec35fdf>, line 27)

In [None]:
import os
from Bio import Entrez
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import GoogleGenerativeAIEmbeddings
from langchain.chat_models import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# 1. SETUP API KEYS
os.environ["GOOGLE_API_KEY"] = "your_google_api_key_here"  # From makersuite.google.com/app
Entrez.email = "your_email@example.com"                    # Required for PubMed access

# 2. FETCH ARTICLES FROM PUBMED
def fetch_pubmed_abstracts(query, max_results=5):
    search = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(search)
    ids = record["IdList"]

    if not ids:
        return []

    fetch = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="abstract", retmode="text")
    text = fetch.read()
    abstracts = [ab.strip() for ab in text.split("\n\n") if ab.strip()]
    return abstracts

# 3. CREATE DOCUMENT OBJECTS & SPLIT INTO CHUNKS
def prepare_documents(abstracts):
    docs = [Document(page_content=ab) for ab in abstracts]
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    return splitter.split_documents(docs)

# 4. BUILD VECTORSTORE FROM EMBEDDINGS
def build_vectorstore(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectordb = FAISS.from_documents(chunks, embeddings)
    return vectordb

# 5. SETUP RETRIEVAL QA CHAIN WITH GEMINI
def build_rag_chain(vectorstore):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=True
    )
    return chain

# 6. MAIN FUNCTION
def run_pubmed_rag_pipeline(user_query):
    print(f"\n[+] Searching PubMed for: {user_query}")
    abstracts = fetch_pubmed_abstracts(user_query)
    if not abstracts:
        return "No relevant articles found on PubMed."

    chunks = prepare_documents(abstracts)
    vectordb = build_vectorstore(chunks)
    qa_chain = build_rag_chain(vectordb)

    print("[+] Generating answer using Gemini + RAG...")
    result = qa_chain(user_query)
    return result["result"]

# 7. RUN
if __name__ == "__main__":
    query = input("Enter your health

SyntaxError: unterminated string literal (detected at line 69) (<ipython-input-2-43d5f62ceff3>, line 69)

In [None]:
# STEP 1: Install dependencies
# Run this in terminal or notebook if not installed
# pip install langchain langchain-google-genai biopython faiss-cpu

import os
from Bio import Entrez
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import GoogleGenerativeAIEmbeddings
from langchain.chat_models import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# STEP 2: Set Gemini API key and Entrez email
os.environ["GOOGLE_API_KEY"] = "AIzaSyBPAmoZsvlm89LxPqgVdqUTurz--US0Fh8"  # YOUR API KEY
Entrez.email = "your_email@example.com"  # Put your real email here (required by NCBI)

# STEP 3: Fetch abstracts from PubMed
def fetch_pubmed_abstracts(query, max_results=5):
    search = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(search)
    ids = record["IdList"]

    if not ids:
        return []

    fetch = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="abstract", retmode="text")
    text = fetch.read()
    abstracts = [ab.strip() for ab in text.split("\n\n") if ab.strip()]
    return abstracts

# STEP 4: Chunk abstracts into smaller documents
def prepare_documents(abstracts):
    docs = [Document(page_content=ab) for ab in abstracts]
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    return splitter.split_documents(docs)

# STEP 5: Build FAISS vectorstore with Gemini embeddings
def build_vectorstore(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectordb = FAISS.from_documents(chunks, embeddings)
    return vectordb

# STEP 6: RAG pipeline with Gemini
def build_rag_chain(vectorstore):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=True
    )
    return chain

# STEP 7: Run everything
def run_pubmed_rag_pipeline(user_query):
    print(f"\n[+] Searching PubMed for: {user_query}")
    abstracts = fetch_pubmed_abstracts(user_query)
    if not abstracts:
        return "No relevant articles found on PubMed."

    chunks = prepare_documents(abstracts)
    vectordb = build_vectorstore(chunks)
    qa_chain = build_rag_chain(vectordb)

    print("[+] Generating answer using Gemini + RAG...")
    result = qa_chain(user_query)
    return result["result"]

# STEP 8: Start
if __name__ ==

SyntaxError: invalid syntax (<ipython-input-4-8ed8c60516a7>, line 72)

In [None]:
# Install dependencies first:
# pip install langchain langchain-google-genai biopython faiss-cpu

import os
from Bio import Entrez
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import GoogleGenerativeAIEmbeddings
from langchain.chat_models import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# STEP 1: Set your Gemini API Key and PubMed email
os.environ["GOOGLE_API_KEY"] = "AIzaSyBPAmoZsvlm89LxPqgVdqUTurz--US0Fh8"  # Your Gemini API key
Entrez.email = "youremail@example.com"  # Replace with your email (required by PubMed API)

# STEP 2: Fetch abstracts from PubMed
def fetch_pubmed_abstracts(query, max_results=5):
    search_handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(search_handle)
    ids = record.get("IdList", [])

    if not ids:
        return []

    fetch_handle = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="abstract", retmode="text")
    abstracts = fetch_handle.read().strip().split("\n\n")
    return [ab.strip() for ab in abstracts if ab.strip()]

# STEP 3: Split abstracts into smaller chunks
def split_abstracts_to_chunks(abstracts):
    documents = [Document(page_content=ab) for ab in abstracts]
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    return splitter.split_documents(documents)

# STEP 4: Create FAISS vectorstore with Gemini Embeddings
def create_vectorstore(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectordb = FAISS.from_documents(chunks, embeddings)
    return vectordb

# STEP 5: Create RAG chain using Gemini LLM and Retriever
def create_rag_chain(vectordb):
    retriever = vectordb.as_retriever(search_kwargs={"k": 3})
    llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# STEP 6: Combine steps into a full pipeline
def run_rag_pipeline(question):
    print(f"\n[+] Searching PubMed for: '{question}'")
    abstracts = fetch_pubmed_abstracts(question)

    if not abstracts:
        return "No relevant PubMed articles found for the given query."

    chunks = split_abstracts_to_chunks(abstracts)
    vectordb = create_vectorstore(chunks)
    rag_chain_

ModuleNotFoundError: No module named 'Bio'

In [None]:
# STEP 1: Install dependencies
# Install BioPython, LangChain, FAISS, and Google Gemini SDK
# Run these commands in the terminal (or Colab notebook):
# pip install biopython langchain langchain-google-genai faiss-cpu

import os
from Bio import Entrez  # BioPython for PubMed
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import GoogleGenerativeAIEmbeddings
from langchain.chat_models import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# STEP 2: Set your Gemini API Key and PubMed email
os.environ["GOOGLE_API_KEY"] = "AIzaSyBPAmoZsvlm89LxPqgVdqUTurz--US0Fh8"  # Your Gemini API key
Entrez.email = "youremail@example.com"  # Replace with your actual email (required by PubMed API)

# STEP 3: Fetch abstracts from PubMed using Entrez
def fetch_pubmed_abstracts(query, max_results=5):
    search_handle = Entrez.esearch(db="pub_

SyntaxError: unterminated string literal (detected at line 21) (<ipython-input-6-7ed7f1b9e460>, line 21)

In [None]:
# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyBPAmoZsvlm89LxPqgVdqUTurz--US0Fh8"  # Your Gemini API key
Entrez.email = "your_email@example.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m58.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyBfzY7L6uMzrJp0m2WjtNeVjJDMhQHVF_s"  # Replace with your Gemini API key
Entrez.email = "keerthana221712@gmail.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create Gemini-based QA system
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask your question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("\n🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")

# ✅ Step 8: Run everything interactively
if __name__ == "__main__":
    user_query = input("💬 Enter your medical/healthcare question: ")

    print("\n🔍 Searching PubMed for related research...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    if not articles:
        print("❌ No articles found on this topic. Try a different question.")
    else:
        print("📚 Building knowledge base from PubMed articles...")
        vectorstore = build_vectorstore_from_articles(articles)

        print("🤖 Connecting to Gemini for answer generation...")
        qa_chain = create_qa_chain(vectorstore)

        ask_health_question(user_query, qa_chain)

💬 Enter your medical/healthcare question: Types of influenza virus 

🔍 Searching PubMed for related research...
📚 Building knowledge base from PubMed articles...
🤖 Connecting to Gemini for answer generation...


  result = qa_chain(query)


NotFound: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.

In [None]:
# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyBPAmoZsvlm89LxPqgVdqUTurz--US0Fh8"  # Your Gemini API key
Entrez.email = "your_email@example.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()

In [None]:

# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyA7yubqJHh1bE8unQJYwYgzud2DJUAuDGoI"  # Replace with your Gemini API key
Entrez.email = "tarunshot516@gmail.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create Gemini-based QA system
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask your question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("\n🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")

# ✅ Step 8: Run everything interactively
if __name__ == "__main__":
    user_query = input("💬 Enter your medical/healthcare question: ")

    print("\n🔍 Searching PubMed for related research...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    if not articles:
        print("❌ No articles found on this topic. Try a different question.")
    else:
        print("📚 Building knowledge base from PubMed articles...")
        vectorstore = build_vectorstore_from_articles(articles)

        print("🤖 Connecting to Gemini for answer generation...")
        qa_chain = create_qa_chain(vectorstore)

        ask_health_question(user_query, qa_chain)

💬 Enter your medical/healthcare question: What is common diabetes?

🔍 Searching PubMed for related research...
📚 Building knowledge base from PubMed articles...
🤖 Connecting to Gemini for answer generation...

🩺 Answer:

This article discusses diabetes mellitus as a common chronic disease and steatotic liver disease (SLD) as one of the most common diseases encountered in general practice.  It does not define "common diabetes."

📚 Sources:

Source 1:
Diabetes mellitus, as a common chronic disease, easily leads to significant 
changes in the structure of the eye, among which diabetic cataract is 
particularly common. Although surgery is the main treatment for this 
complication, it may be accompanied by postoperative complications. Therefore, 
it is particularly important to develop specific drugs for diabetic cataract, 
aiming to fundamentally reduce its incidence and reduce the need for surgery. At 
present, the greatest challenge is to dev...

Source 2:
OBJECTIVE: To provide an update

In [None]:
# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyBPAmoZsvlm89LxPqgVdqUTurz--US0Fh8"  # Your Gemini API key
Entrez.email = "your_email@example.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return