In [None]:

# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyA7zpqJHh1bE8unQJYwYgzud2DJUAuDGoI"  # Replace with your Gemini API key
Entrez.email = "tarunshot516@gmail.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create Gemini-based QA system
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask your question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("\n🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")

# ✅ Step 8: Run everything interactively
if __name__ == "__main__":
    user_query = input("💬 Enter your medical/healthcare question: ")

    print("\n🔍 Searching PubMed for related research...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    if not articles:
        print("❌ No articles found on this topic. Try a different question.")
    else:
        print("📚 Building knowledge base from PubMed articles...")
        vectorstore = build_vectorstore_from_articles(articles)

        print("🤖 Connecting to Gemini for answer generation...")
        qa_chain = create_qa_chain(vectorstore)

        ask_health_question(user_query, qa_chain)

💬 Enter your medical/healthcare question: Different types of antibiotics

🔍 Searching PubMed for related research...
📚 Building knowledge base from PubMed articles...
🤖 Connecting to Gemini for answer generation...

🩺 Answer:

Based on the provided abstracts, the following antibiotic types are mentioned:

* **β-lactam/tetracycline**
* **Gram-Staining** (Note: Gram-staining is a method to classify bacteria, not an antibiotic itself, but it's used to guide antibiotic choice.)

📚 Sources:

Source 1:
1. J Clin Med. 2025 Apr 4;14(7):2468. doi: 10.3390/jcm14072468.

The Performance of Gram-Staining in Tailoring the Empirical Antibiotic Choice in 
Operated Diabetic Foot Infections.

Altmann D(1), Liebe J(1), Waibel FWA(1), Schöni M(1), Napoli F(1), Sydler C(1), 
Schläfli F(1), Ledermann L(1), Lipsky BA(2), Uçkay I(1)(3).

Author information:
(1)Technical Orthopedics and Neuro-Orthopedics Team, Department of Orthopedic 
Surgery, Balgrist University Hospital, University of Zurich, 8008 Zurich, 

In [None]:

# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community faiss-cpu sentence-transformers biopython google-generativeai

# ✅ Step 2: Imports
import os
import google.generativeai as genai
from Bio import Entrez
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings

# ✅ Step 3: Set keys and model
genai.configure(api_key="AIzaSyC_q5MzIhC_sCAwnWbCvSl8HfSLHenPyfo")  # Replace with your Gemini key
Entrez.email = "your_email@example.com"  # Replace with your email

# ✅ Step 4: Fetch from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store with HuggingFace embeddings
def build_vectorstore_from_articles(articles):
    docs = [Document(page_content=article) for article in articles]
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    split_docs = splitter.split_documents(docs)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(split_docs, embeddings)
    return vectorstore

# ✅ Step 6: Gemini Free API Call (outside LangChain)
def gemini_answer(question, context):
    prompt = f"""You are a medical assistant. Use the context below to answer the user's question.

Context:
{context}

Question:
{question}

Answer:"""
    model = genai.GenerativeModel("gemini-pro")
    response = model.generate_content(prompt)
    return response.text

# ✅ Step 7: Full pipeline
def ask_health_question(question):
    print("🔍 Fetching articles from PubMed...")
    articles = fetch