In [None]:
# Install required packages
!pip install langchain langchain-community langchain-google-genai faiss-cpu wikipedia

# === Imports ===
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WikipediaLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
import os

# === Set Gemini API key ===
os.environ["GOOGLE_API_KEY"] = "AIzaSyAU7h53klfZxnkZCLR2i8UHo7n5Yj5Wtas"

# === Step 1: Load topic from Wikipedia ===
topic = input("Enter a topic to search on Wikipedia: ")
loader = WikipediaLoader(query=topic, load_max_docs=3)
docs = loader.load()
print(f"\nLoaded {len(docs)} Wikipedia documents about '{topic}'.")

# === Step 2: Embed documents ===
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = FAISS.from_documents(docs, embeddings)

# === Step 3: Create retriever ===
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

# === Step 4: Set up Gemini LLM ===
llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro-exp-03-25", temperature=0.5)

# === Step 5: Create QA Chain ===
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# === Step 6: Interactive chat loop ===
print("\nChatbot is ready! Ask anything about the topic.")
print("Type 'exit' to stop.")

while True:
    question = input("\nYou: ")
    if question.lower() in ["exit", "quit"]:
        print("Chat ended.")
        break

    try:
        result = qa_chain({"query": question})
        answer = result.get("result", "[No answer returned by the model.]")
        print("\nBot:", answer)

        # Optional: Show retrieved context
        print("\n--- Context Used ---")
        for i, doc in enumerate(result['source_documents']):
            print(f"\nSource {i+1}:\n{doc.page_content[:300]}...")
    except Exception as e:
        print("Error:", str(e))

Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.2-py3-none-any.whl.metadata (4.7 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting goo


Loaded 3 Wikipedia documents about 'When is Ms dhoni birthday'.

Chatbot is ready! Ask anything about the topic.
Type 'exit' to stop.


  result = qa_chain({"query": question})



Bot: Based on the provided text, Mahendra Singh Dhoni's birthday is 7 July 1981.

--- Context Used ---

Source 1:
Mahendra Singh Dhoni ( ; born 7 July 1981) is an Indian professional cricketer who plays as a right-handed batter and a wicket-keeper. Widely regarded as one of the most prolific wicket-keeper batsmen and captains and one of the greatest ODI batsmen, he represented the Indian cricket team and was th...

Source 2:
M.S. Dhoni: The Untold Story is a 2016 Indian Hindi-language biographical sports drama film directed and co-written by Neeraj Pandey. It is based on the life of former Test, ODI and T20I captain of the Indian national cricket team, Mahendra Singh Dhoni. The film stars the late Sushant Singh Rajput a...

Source 3:
Disha Patani (pronounced [ˈd̪ɪʃa paːʈəˈni]; born 13 June 1992) is an Indian actress primarily working in Hindi films. Patani made her acting debut with the Telugu film Loafer (2015), and had her first Hindi film release with the biopic M.S. Dhoni: The Unt

In [None]:
# ✅ Step 1: Install dependencies
!pip install -q langchain langchain-community langchain-google-genai faiss-cpu biopython google-generativeai

# ✅ Step 2: Import necessary libraries
import os
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from Bio import Entrez

# ✅ Step 3: API keys
os.environ["GOOGLE_API_KEY"] = "AIzaSyAU7h53klfZxnkZCLR2i8UHo7n5Yj5Wtas"  # Replace with your Gemini API key
Entrez.email = "sanjan05@gmail.com"  # Replace with your email for PubMed

# ✅ Step 4: Fetch articles from PubMed
def fetch_pubmed_articles(query, max_results=5):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]
    abstracts = []
    for pmid in ids:
        fetch = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="text")
        abstract_text = fetch.read()
        abstracts.append(abstract_text)
    return abstracts

# ✅ Step 5: Build vector store
def build_vectorstore_from_articles(articles):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.create_documents(articles)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

# ✅ Step 6: Create Gemini-based QA system
def create_qa_chain(vectorstore):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0.2)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Step 7: Ask your question
def ask_health_question(query, qa_chain):
    result = qa_chain(query)
    print("\n🩺 Answer:\n")
    print(result["result"])
    print("\n📚 Sources:")
    for i, doc in enumerate(result["source_documents"]):
        print(f"\nSource {i+1}:\n{doc.page_content[:500]}...")

# ✅ Step 8: Run everything interactively
# ✅ Step 8: Run everything interactively
if _name_ == "_main_":
    user_query = input("💬 Enter your medical/healthcare question: ")

    print("\n🔍 Searching PubMed for related research...")
    articles = fetch_pubmed_articles(user_query, max_results=5)

    if not articles:
        print("❌ No articles found on this topic. Try a different question.")
    else:
        print("📚 Building knowledge base from PubMed articles...")
        vectorstore = build_vectorstore_from_articles(articles)

        print("🤖 Connecting to Gemini for answer generation...")
        qa_chain = create_qa_chain(vectorstore)

        ask_health_question(user_query, qa_chain)