# Medical RAG System - Document Retrieval and Question Answering

This notebook demonstrates a Retrieval Augmented Generation (RAG) system for medical knowledge base queries.

In [None]:
# Install required packages
!pip install langchain openai langchain-google-genai langchain-community

In [None]:
# Import required libraries
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import re
from collections import Counter
from getpass import getpass

In [None]:
# Load medical knowledge base
loader = TextLoader("medical_data.txt")
documents = loader.load()
print(f"Loaded {len(documents)} documents")

In [None]:
# Split documents into chunks for better retrieval
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)
print(f"Created {len(docs)} document chunks")

In [None]:
# Simple text-based retrieval system
def simple_search(query, documents, top_k=3):
    """Keyword-based search function"""
    query_words = query.lower().split()
    
    scores = []
    for i, doc in enumerate(documents):
        doc_text = doc.page_content.lower()
        score = sum(doc_text.count(word) for word in query_words)
        scores.append((score, i, doc))
    
    scores.sort(reverse=True)
    return scores[:top_k]

print("Document retrieval system ready!")

In [None]:
# API Key Setup (Optional - for enhanced responses)
# Uncomment one of the following options to use API-based responses:

# Option 1: OpenAI API
# openai_api_key = getpass("Enter your OpenAI API Key: ")
# os.environ["OPENAI_API_KEY"] = openai_api_key

# Option 2: Google Generative AI
# google_api_key = getpass("Enter your Google API Key: ")
# os.environ["GOOGLE_API_KEY"] = google_api_key

print("API setup ready (currently using simple text-based responses)")

In [None]:
# Answer generation function
def generate_answer(query, retrieved_docs):
    """Extract relevant sentences from retrieved documents"""
    query_words = query.lower().split()
    relevant_sentences = []
    
    for score, idx, doc in retrieved_docs:
        sentences = doc.page_content.split('.')
        for sentence in sentences:
            if any(word in sentence.lower() for word in query_words):
                relevant_sentences.append(sentence.strip())
    
    if relevant_sentences:
        return ". ".join(relevant_sentences[:3]) + "."
    else:
        return "No specific information found in the medical knowledge base."

print("Answer generation system ready!")

In [None]:
# Main QA System
def medical_qa_system(question):
    """Complete QA system for medical queries"""
    # Step 1: Retrieve relevant documents
    retrieved_docs = simple_search(question, docs, top_k=3)
    
    # Step 2: Generate answer
    answer = generate_answer(question, retrieved_docs)
    
    return answer, retrieved_docs

print("Medical QA system ready!")

In [None]:
# Enhanced QA System with API Integration
def enhanced_qa_system(question, use_api=False):
    """Enhanced QA system with optional API integration"""
    retrieved_docs = simple_search(question, docs, top_k=3)
    context = "\n\n".join([doc.page_content for score, idx, doc in retrieved_docs])
    
    if use_api and "OPENAI_API_KEY" in os.environ:
        try:
            import openai
            client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])
            
            prompt = f"""Based on the medical information below, answer the question comprehensively.

Context: {context}
Question: {question}

Answer:"""

            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=500,
                temperature=0.3
            )
            return response.choices[0].message.content, retrieved_docs
        except Exception as e:
            print(f"API error: {e}. Using simple generation...")
    
    elif use_api and "GOOGLE_API_KEY" in os.environ:
        try:
            import google.generativeai as genai
            genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
            model = genai.GenerativeModel('gemini-pro')
            
            prompt = f"""Based on the medical information below, answer the question comprehensively.

Context: {context}
Question: {question}

Answer:"""

            response = model.generate_content(prompt)
            return response.text, retrieved_docs
        except Exception as e:
            print(f"API error: {e}. Using simple generation...")
    
    # Fallback to simple generation
    return generate_answer(question, retrieved_docs), retrieved_docs

print("Enhanced QA system ready!")

In [None]:
# Demo: Test the Medical RAG System
print("🏥 MEDICAL RAG SYSTEM DEMO")
print("=" * 50)

test_questions = [
    "What are the symptoms of diabetes?",
    "How is hypertension treated?",
    "What causes asthma?",
    "What are the risk factors for heart disease?"
]

for i, question in enumerate(test_questions, 1):
    print(f"\n{i}. Question: {question}")
    answer, retrieved_docs = medical_qa_system(question)
    print(f"   Answer: {answer[:200]}{'...' if len(answer) > 200 else ''}")
    print(f"   Sources: {len(retrieved_docs)} relevant document chunks")

print("\n" + "=" * 50)
print("✅ System working successfully!")
print("💡 To get enhanced AI responses, add your API key in the setup section above.")