<a href="https://colab.research.google.com/github/mmarushika/sdc-lab/blob/main/RAGMedicalQA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# ✅ STEP 1: Install Dependencies (including langchain-community)


# ✅ STEP 2: Load Sample Medical Documents
medical_docs = [
    "Hypertension is a condition in which the blood pressure in the arteries is persistently elevated.",
    "Diabetes is a metabolic disease that causes high blood sugar. The hormone insulin moves sugar from the blood into your cells.",
    "Asthma is a respiratory condition marked by spasms in the bronchi of the lungs, causing difficulty in breathing.",
    "A migraine is a headache of varying intensity, often accompanied by nausea and sensitivity to light and sound.",
]

# ✅ STEP 3: Create Embeddings & Vector Store
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

# Convert raw texts to Document objects
documents = [Document(page_content=doc) for doc in medical_docs]

# Split into chunks
splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=50)
docs = splitter.split_documents(documents)

# Use sentence-transformers for embeddings
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create FAISS vector store
db = FAISS.from_documents(docs, embedding)

# ✅ STEP 4: Setup the LLM using Hugging Face pipeline
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

# Load FLAN-T5 model for question answering
qa_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=256,
    model_kwargs={"temperature": 0}
)

# Wrap pipeline with LangChain LLM
llm = HuggingFacePipeline(pipeline=qa_pipeline)

# ✅ STEP 5: Create the Retrieval-Augmented Generation Chain
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(),
    return_source_documents=True
)

# ✅ STEP 6: Ask Medical Questions
query = "What is hypertension?"
result = qa_chain(query)

# ✅ STEP 7: Display the Answer and Source
print("🔍 Question:", query)
print("💬 Answer:", result['result'])
print("\n📚 Source Document(s):")
for doc in result['source_documents']:
    print("-", doc.page_content)


Device set to use cpu


🔍 Question: What is hypertension?
💬 Answer: a condition in which the blood pressure in the arteries is persistently elevated

📚 Source Document(s):
- Hypertension is a condition in which the blood pressure in the arteries is persistently elevated.
- Diabetes is a metabolic disease that causes high blood sugar. The hormone insulin moves sugar from the blood into your cells.
- Asthma is a respiratory condition marked by spasms in the bronchi of the lungs, causing difficulty in breathing.
- A migraine is a headache of varying intensity, often accompanied by nausea and sensitivity to light and sound.
