In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# ======================== CELL 1: INSTALL DEPENDENCIES (Memory-Optimized) ==========================

import subprocess
import sys

print('üîß Installing memory-optimized dependencies...')
print('='*80)
print("\nüì¶ STEP 1: Cleaning up conflicting packages...")
subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", 
                "pyarrow", "preprocessing", "textblob", "nltk", "transformers", 
                "sentence-transformers", "huggingface-hub"], 
               capture_output=True, check=False)

print("\nüì¶ STEP 2: Installing compatible versions (optimized for low memory)...\n")

packages = [
    ("nltk==3.9", "NLTK Tokenization"),
    ("pyarrow==18.0.1", "PyArrow"),
    ("huggingface-hub==0.30.0", "HuggingFace Hub"),
    ("transformers==4.41.2", "Transformers"),
    ("sentence-transformers==2.7.0", "Sentence Transformers"),
    ("faiss-cpu==1.8.0", "FAISS"),
    ("rank-bm25==0.2.2", "Rank BM25"),
    ("sacremoses==0.1.1", "SacreMoses"),
]

for package, name in packages:
    print(f"Installing {name} ({package})...")
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", package], 
                   capture_output=True, check=False)
    print(f"  ‚úÖ Done\n")

print("="*80)
print("‚úÖ All dependencies installed successfully!")
print("‚úÖ Memory-optimized configuration ready!")
print("="*80)
print("\n‚úÖ After restart, run CELL 2")


In [1]:
import warnings
warnings.filterwarnings("ignore")
import os
import re
import json
import pickle
import time
import gc  # ‚úÖ NEW: Garbage collection for memory management
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional

import numpy as np
import torch
import faiss
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer, CrossEncoder
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize, sent_tokenize
import nltk

try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt', quiet=True)

# ‚úÖ OPTIMIZATION: Use CPU to save GPU memory (or use 'cuda' if you have 8GB+ VRAM)
device = torch.device("cpu")  # Changed from "cuda if torch.cuda.is_available()" to force CPU
print(f"üîß Using device: {device} (Memory-optimized mode)")

@dataclass
class DomainConfig:
    name: str
    dataset_name: str
    index_path: str
    id2doc_path: str
DOMAINS = [
    DomainConfig(
        name="general_medical",
        dataset_name="General Medical",
        index_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/general_medical_faiss.index",
        id2doc_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/general_medical_id2doc.pkl"
    ),
    DomainConfig(
        name="mental_health",
        dataset_name="Mental Health",
        index_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/mental_health_faiss.index",
        id2doc_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/mental_health_id2doc.pkl"
    ),
    DomainConfig(
        name="ophthalmology",
        dataset_name="Ophthalmology",
        index_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/ophthalmology_faiss.index",
        id2doc_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/ophthalmology_id2doc.pkl"
    ),
    DomainConfig(
        name="pediatrics",
        dataset_name="Pediatrics",
        index_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/pediatrics_faiss.index",
        id2doc_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/pediatrics_id2doc.pkl"
    ),
    DomainConfig(
        name="symptoms_triage",
        dataset_name="Symptoms Triage",
        index_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/symptoms_triage_faiss.index",
        id2doc_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/symptoms_triage_id2doc.pkl"
    ),
    DomainConfig(
        name="women_health",
        dataset_name="Women's Health",
        index_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/women_health_faiss.index",
        id2doc_path="/kaggle/input/indexespklmtdt/medical_rag_indexes/women_health_id2doc.pkl"
    ),
    DomainConfig(
        name="Cancer",
        dataset_name="Cancer Medical QA",
        index_path="/kaggle/input/indexes2/Cancer_index.faiss",
        id2doc_path="/kaggle/input/indexes2/Cancer_docs.pkl"
    ),
    DomainConfig(
        name="Cardiology",
        dataset_name="Cardiology Medical QA",
        index_path="/kaggle/input/indexes2/Cardiology_index.faiss",
        id2doc_path="/kaggle/input/indexes2/Cardiology_docs.pkl"
    ),
    DomainConfig(
        name="Dermatology",
        dataset_name="Dermatology Medical QA",
        index_path="/kaggle/input/indexes2/dermatology_index.faiss",
        id2doc_path="/kaggle/input/indexes2/Dermatology_docs.pkl"   
    ),
    DomainConfig(
        name="Diabetes-Digestive-Kidney",
        dataset_name="Diabetes/Digestive/Kidney Medical QA",
        index_path="/kaggle/input/indexes2/Diabetes-Digestive-Kidney_index.faiss",
        id2doc_path="/kaggle/input/indexes2/Diabetes-Digestive-Kidney_docs.pkl"
    ),
    DomainConfig(
        name="Neurology",
        dataset_name="Neurology Medical QA",
        index_path="/kaggle/input/indexes2/Neurology_index.faiss",
        id2doc_path="/kaggle/input/indexes2/Neurology_docs.pkl"
    ),
]
class RAGConfig:
    """Memory-optimized configuration (reduces RAM from 15GB ‚Üí 6-7GB)"""
    
    # ‚úÖ OPTIMIZATION 1: Switch to SMALLER models
    EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # 80MB (keep same)
    RERANK_MODEL = "BAAI/bge-reranker-base"  # ‚úÖ Changed from 'large' (1.2GB ‚Üí 300MB)
    HYDE_MODEL = "google/flan-t5-base"  # ‚úÖ Changed from 'large' (3GB ‚Üí 900MB)
    GENERATOR_MODEL = "google/flan-t5-base"  # ‚úÖ Changed from 'large'
    
    # ‚úÖ OPTIMIZATION 2: Reduce retrieval batch sizes
    FAISS_TOP_K = 30  # Reduced from 50
    BM25_TOP_K = 30   # Reduced from 50
    FINAL_TOP_K = 5   # Reduced from 8
    FAISS_WEIGHT = 0.6
    BM25_WEIGHT = 0.4
    QUERY_WEIGHT = 0.6
    HYDE_WEIGHT = 0.4
    MAX_CONTEXT_LENGTH = 512
    MAX_ANSWER_LENGTH = 256
    TEMPERATURE = 0.3
    NUM_BEAMS = 4
    DO_SAMPLE = False

config = RAGConfig()

print(f"\n‚úÖ Memory-optimized configuration loaded")
print(f"üìä Total domains: {len(DOMAINS)}")

üîß Using device: cpu (Memory-optimized mode)

‚úÖ Memory-optimized configuration loaded
üìä Total domains: 11


In [2]:
# ======================== MEMORY-EFFICIENT RAG PIPELINE ==========================

import warnings
warnings.filterwarnings("ignore")
class MemoryEfficientRAGPipeline:
    """
    üß† MEMORY-OPTIMIZED Medical RAG System
    ‚Ä¢ Reduces RAM from 15GB ‚Üí 6-7GB
    ‚Ä¢ Lazy loading (load models on-demand)
    ‚Ä¢ Garbage collection (free memory after use)
    ‚Ä¢ Smaller models (T5-base, bge-reranker-base)
    ‚Ä¢ Emergency detection
    ‚Ä¢ Professional formatting
    """
    
    def __init__(self, config: RAGConfig, domains: List[DomainConfig]):
        self.config = config
        self.domain_configs = {d.name: d for d in domains}
        
        # Suppress progress bars
        import transformers
        transformers.logging.set_verbosity_error()
        
        print("="*80)
        print("üè• INITIALIZING MEDICAL RAG SYSTEM")
        print("="*80)

        self._load_lightweight_models()
        
        self.loaded_domains = {}
        self.all_domain_paths = {d.name: d for d in domains}
        self.reranker = None
        self.generator_model = None
        self.generator_tokenizer = None
        
        print(f"\n‚úÖ Pipeline initialized")
        print(f"üíæ Domains: {len(domains)} (will load on-demand)")
        print("="*80)
    
    def _load_lightweight_models(self):
        """Load only embedder (80MB)"""
        print("\nüì¶ Loading lightweight embedder...")
        self.embedder = SentenceTransformer(self.config.EMBED_MODEL, device=device)
        print("  ‚úÖ Embedder loaded (80MB)")
    
    def _load_domain_index(self, domain_name: str):
        """‚úÖ Lazy load: Load domain index on-demand"""
        if domain_name in self.loaded_domains:
            return self.loaded_domains[domain_name]
        
        domain_config = self.all_domain_paths[domain_name]
        
        if not os.path.exists(domain_config.index_path):
            return None
        
        print(f"  üìÇ Loading {domain_name} index...")
        
        try:
            index = faiss.read_index(domain_config.index_path)
            
            with open(domain_config.id2doc_path, 'rb') as f:
                id2doc_raw = pickle.load(f)
            
            # Handle dict format
            id2doc = []
            if isinstance(id2doc_raw, list):
                for item in id2doc_raw:
                    if isinstance(item, str):
                        id2doc.append(item)
                    elif isinstance(item, dict):
                        text = (item.get('text') or item.get('content') or 
                               item.get('answer') or str(item))
                        id2doc.append(text)
                    else:
                        id2doc.append(str(item))
            else:
                id2doc = [str(id2doc_raw)]
            
            # Create BM25
            tokenized = []
            for doc in id2doc:
                try:
                    tokenized.append(word_tokenize(str(doc).lower()))
                except:
                    tokenized.append([])
            
            bm25 = BM25Okapi(tokenized)
            
            self.loaded_domains[domain_name] = {
                'faiss_index': index,
                'bm25_index': bm25,
                'id2doc': id2doc
            }
            
            print(f"    ‚úÖ Loaded {len(id2doc)} chunks")
            return self.loaded_domains[domain_name]
            
        except Exception as e:
            print(f"    ‚ùå Failed: {str(e)[:50]}")
            return None
    
    def _unload_domains(self, keep_domains=None):
        """‚úÖ Free memory: Unload unused domains"""
        if keep_domains is None:
            keep_domains = []
        
        domains_to_remove = [d for d in self.loaded_domains.keys() 
                            if d not in keep_domains]
        
        for domain in domains_to_remove:
            del self.loaded_domains[domain]
        
        gc.collect()  # Force garbage collection
        
        if domains_to_remove:
            print(f"  üóëÔ∏è  Freed memory from {len(domains_to_remove)} domains")
    
    def _load_reranker(self):
        """‚úÖ Lazy load: Load reranker only when needed"""
        if self.reranker is None:
            print("  üì¶ Loading reranker...")
            self.reranker = CrossEncoder(self.config.RERANK_MODEL, device=device)
            print("    ‚úÖ Reranker loaded (300MB)")
        return self.reranker
    
    def _unload_reranker(self):
        """‚úÖ Free memory: Unload reranker"""
        if self.reranker is not None:
            del self.reranker
            self.reranker = None
            gc.collect()
            print("  üóëÔ∏è  Reranker unloaded")
    
    def _load_generator(self):
        """‚úÖ Lazy load: Load generator only when needed"""
        if self.generator_model is None:
            print("  üì¶ Loading generator...")
            self.generator_tokenizer = AutoTokenizer.from_pretrained(self.config.GENERATOR_MODEL)
            self.generator_model = AutoModelForSeq2SeqLM.from_pretrained(
                self.config.GENERATOR_MODEL
            ).to(device)
            print("    ‚úÖ Generator loaded (900MB)")
        return self.generator_model, self.generator_tokenizer
    
    def _unload_generator(self):
        """‚úÖ Free memory: Unload generator"""
        if self.generator_model is not None:
            del self.generator_model
            del self.generator_tokenizer
            self.generator_model = None
            self.generator_tokenizer = None
            gc.collect()
            print("  üóëÔ∏è  Generator unloaded")
    
    def _detect_emergency(self, query: str) -> bool:
        """Detect life-threatening emergencies"""
        emergency_keywords = [
            'stiff neck', 'purple spots', 'meningitis', 'chest pain', 'chest tightness',
            'difficulty breathing', 'shortness of breath', 'severe bleeding', 'bleeding heavily',
            'unconscious', 'unresponsive', 'can\'t breathe', 'stroke', 'facial droop',
            'arm weakness', 'slurred speech', 'blurred vision in one eye', 'severe headache',
            'allergic reaction', 'anaphylaxis', 'swelling throat', 'call 911', 'emergency'
        ]
        
        query_lower = query.lower()
        return any(kw in query_lower for kw in emergency_keywords)
    
    def route_to_domains(self, query: str) -> List[str]:
        """Smart domain routing with emergency prioritization"""
        
        if self._detect_emergency(query):
            return ['symptoms_triage']
        
        query_lower = query.lower()
        
        domain_keywords = {
            'drug_info': ['drug', 'medication', 'medicine', 'pill', 'prescription'],
            'mental_health': ['anxiety', 'panic', 'depression', 'stress', 'mental'],
            'ophthalmology': ['eye', 'vision', 'sight', 'blind', 'cataract'],
            'pediatrics': ['child', 'children', 'baby', 'infant', 'year-old'],
            'symptoms_triage': ['fever', 'pain', 'rash', 'bleeding', 'urgent'],
            'women_health': ['period', 'pregnancy', 'pregnant', 'breast'],
            'Cancer': ['cancer', 'tumor', 'malignant'],
            'Cardiology': ['heart', 'cardiac', 'blood pressure', 'chest'],
            'Dermatology': ['skin', 'rash', 'acne', 'eczema'],
            'Diabetes-Digestive-Kidney': ['diabetes', 'sugar', 'insulin', 'kidney'],
            'Neurology': ['brain', 'headache', 'migraine', 'seizure']
        }
        
        keyword_scores = {}
        for domain_name in self.all_domain_paths.keys():
            if domain_name in domain_keywords:
                keywords = domain_keywords[domain_name]
                matches = sum(1 for kw in keywords if kw in query_lower)
                keyword_scores[domain_name] = matches
            else:
                keyword_scores[domain_name] = 0
        
        max_score = max(keyword_scores.values())
        
        if max_score >= 2:
            top_domains = [name for name, score in keyword_scores.items() 
                          if score >= max(2, max_score - 1)]
            return top_domains[:3]
        
        # Fallback to embedding (lightweight)
        query_emb = self.embedder.encode([query], normalize_embeddings=True, 
                                        convert_to_numpy=True, show_progress_bar=False)
        
        # Load only 1-2 sample docs per domain for routing
        scores = []
        for domain_name in list(self.all_domain_paths.keys())[:5]:  # Check first 5 domains
            domain_data = self._load_domain_index(domain_name)
            if domain_data:
                id2doc = domain_data['id2doc']
                sample_docs = id2doc[:min(20, len(id2doc))]  # Reduced from 50 to 20
                domain_embs = self.embedder.encode(sample_docs, normalize_embeddings=True, 
                                                  convert_to_numpy=True, show_progress_bar=False)
                centroid = np.mean(domain_embs, axis=0, keepdims=True)
                similarity = np.dot(query_emb, centroid.T)[0][0]
                scores.append((domain_name, float(similarity)))
        
        scores.sort(key=lambda x: x[1], reverse=True)
        selected = [name for name, score in scores[:2] if score > 0.25]  # Top 2 domains
        
        if not selected:
            selected = ['general_medical']  # Fallback
        
        return selected
    
    def hybrid_retrieval(self, query: str, domain_names: List[str]) -> List[Dict]:
        """Retrieve from selected domains only"""
        all_candidates = []
        
        for domain_name in domain_names:
            domain_data = self._load_domain_index(domain_name)
            if not domain_data:
                continue
            
            faiss_index = domain_data['faiss_index']
            bm25_index = domain_data['bm25_index']
            id2doc = domain_data['id2doc']
            
            # FAISS search
            query_emb = self.embedder.encode([query], normalize_embeddings=True, 
                                            convert_to_numpy=True, show_progress_bar=False).astype('float32')
            D, I = faiss_index.search(query_emb, self.config.FAISS_TOP_K)
            
            faiss_results = {idx: float(score) for idx, score in zip(I[0], D[0]) if idx < len(id2doc)}
            
            # BM25 search
            tokenized_query = word_tokenize(query.lower())
            bm25_scores = bm25_index.get_scores(tokenized_query)
            top_bm25 = np.argsort(bm25_scores)[::-1][:self.config.BM25_TOP_K]
            
            bm25_results = {int(idx): float(bm25_scores[idx]) for idx in top_bm25 if idx < len(id2doc)}
            
            # Normalize and combine
            max_faiss = max(faiss_results.values()) if faiss_results else 1.0
            max_bm25 = max(bm25_results.values()) if bm25_results else 1.0
            
            all_indices = set(faiss_results.keys()) | set(bm25_results.keys())
            
            for idx in all_indices:
                faiss_score = faiss_results.get(idx, 0.0) / max_faiss
                bm25_score = bm25_results.get(idx, 0.0) / max_bm25
                
                combined_score = (self.config.FAISS_WEIGHT * faiss_score + 
                                self.config.BM25_WEIGHT * bm25_score)
                
                all_candidates.append({
                    'domain': domain_name,
                    'chunk': id2doc[idx],
                    'score': combined_score
                })
        
        all_candidates.sort(key=lambda x: x['score'], reverse=True)
        return all_candidates[:30]  # Reduced from 40
    
    def rerank_results(self, query: str, candidates: List[Dict]) -> List[Dict]:
        """Rerank with cross-encoder"""
        if not candidates:
            return []
        
        # Load reranker
        reranker = self._load_reranker()
        
        pairs = [[query, c['chunk']] for c in candidates]
        rerank_scores = reranker.predict(pairs, show_progress_bar=False)
        
        for i, cand in enumerate(candidates):
            cand['rerank_score'] = float(rerank_scores[i])
        
        candidates.sort(key=lambda x: x['rerank_score'], reverse=True)
        
        # ‚úÖ Unload reranker immediately after use
        self._unload_reranker()
        
        return candidates[:self.config.FINAL_TOP_K]
    
    def _clean_text(self, text: str) -> str:
        """Remove gibberish"""
        gibberish = ['Chat Doctor', 'I am Chat Doctor', 'Alma', 'with Chat']
        cleaned = text
        for pattern in gibberish:
            cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)
        cleaned = re.sub(r'\s+', ' ', cleaned)
        return cleaned.strip()
    
    def generate_answer(self, query: str, context_chunks: List[Dict], is_emergency: bool) -> str:
        """Generate professional answer"""
        
        if is_emergency:
            return (
                "üö® **EMERGENCY - SEEK IMMEDIATE MEDICAL ATTENTION**\n\n"
                "Please call 911 or go to the nearest emergency room immediately. "
                "Based on your symptoms, you may have a life-threatening condition.\n\n"
                "‚ö†Ô∏è This is an emergency. Do not delay."
            )
        
        if not context_chunks:
            return "I apologize, but I couldn't find specific information.\n\n‚ö†Ô∏è Please consult a healthcare professional."
        
        # Build context
        context_parts = []
        for chunk_data in context_chunks[:5]:
            if chunk_data['rerank_score'] > 0.70:
                chunk_text = self._clean_text(chunk_data['chunk'])
                if len(chunk_text) > 50:
                    context_parts.append(chunk_text)
        
        if not context_parts:
            best_chunk = self._clean_text(context_chunks[0]['chunk'])
            sentences = sent_tokenize(best_chunk)
            return ' '.join([s for s in sentences if len(s) > 20][:5]) + "\n\n‚ö†Ô∏è Consult a healthcare professional."
        
        combined_context = "\n\n".join(context_parts)[:2000]
        
        prompt = f"""Answer the medical question professionally.

Context:
{combined_context}

Question: {query}

Answer:"""
        
        try:
            # ‚úÖ Load generator
            model, tokenizer = self._load_generator()
            
            inputs = tokenizer(prompt, return_tensors="pt", max_length=600, truncation=True).to(device)
            
            with torch.no_grad():
                outputs = model.generate(
                    **inputs, max_new_tokens=300, temperature=0.2,
                    num_beams=4, do_sample=False, early_stopping=True,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id
                )
            
            answer = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
            answer = self._clean_text(answer)
            
            # ‚úÖ Unload generator immediately
            self._unload_generator()
            
            if len(answer) < 50:
                best_chunk = self._clean_text(context_chunks[0]['chunk'])
                sentences = sent_tokenize(best_chunk)
                answer = ' '.join([s for s in sentences if len(s) > 20][:5])
            
            answer += "\n\n‚ö†Ô∏è Please consult a healthcare professional for personalized medical advice."
            return answer
            
        except:
            self._unload_generator()
            best_chunk = self._clean_text(context_chunks[0]['chunk'])
            sentences = sent_tokenize(best_chunk)
            return ' '.join([s for s in sentences if len(s) > 20][:5]) + "\n\n‚ö†Ô∏è Consult a healthcare professional."
    
    def compute_metrics(self, query: str, answer: str, context_chunks: List[Dict], is_emergency: bool) -> Dict:
        """Compute confidence metrics"""
        if is_emergency:
            return {'retrieval_score': 0.95, 'faithfulness': 0.95, 'composite': 0.95}
        
        if not context_chunks:
            return {'retrieval_score': 0.0, 'faithfulness': 0.0, 'composite': 0.0}
        
        retrieval_score = np.mean([c['rerank_score'] for c in context_chunks])
        
        answer_emb = self.embedder.encode([answer], normalize_embeddings=True, 
                                         convert_to_numpy=True, show_progress_bar=False)
        context_text = " ".join([c['chunk'] for c in context_chunks])
        context_emb = self.embedder.encode([context_text], normalize_embeddings=True, 
                                          convert_to_numpy=True, show_progress_bar=False)
        faithfulness = float(np.dot(answer_emb, context_emb.T)[0][0])
        
        composite = 0.6 * retrieval_score + 0.4 * faithfulness
        composite = min(max(composite, 0.3), 0.95)
        
        return {
            'retrieval_score': float(retrieval_score),
            'faithfulness': float(faithfulness),
            'composite': float(composite)
        }
    
    def run_query(self, query: str) -> Dict:
        """‚úÖ Memory-efficient query processing"""
        start_time = time.time()
        
        print(f"\nüîç Query: {query}")
        
        # Step 1: Emergency check
        is_emergency = self._detect_emergency(query)
        if is_emergency:
            print(f"üö® EMERGENCY DETECTED")
        
        # Step 2: Route to domains
        selected_domains = self.route_to_domains(query)
        print(f"üìç Domains: {', '.join(selected_domains)}")
        
        if is_emergency:
            top_chunks = []
        else:
            # Step 3: Load selected domains & retrieve
            print("üîé Retrieving information...")
            candidates = self.hybrid_retrieval(query, selected_domains)
            
            if candidates:
                print("üéØ Reranking...")
                top_chunks = self.rerank_results(query, candidates)
            else:
                top_chunks = []
        
        # Step 4: Generate answer
        print("üí¨ Generating answer...")
        answer = self.generate_answer(query, top_chunks, is_emergency)
        
        # Step 5: Compute metrics
        metrics = self.compute_metrics(query, answer, top_chunks, is_emergency)
        
        # ‚úÖ Step 6: Clean up (keep only 2 most recent domains)
        self._unload_domains(keep_domains=selected_domains[:2])
        
        processing_time = time.time() - start_time
        print(f"‚úÖ Done in {processing_time:.2f}s (confidence: {metrics['composite']:.2f})")
        
        return {
            'query': query,
            'answer': answer,
            'domains': selected_domains,
            'sources': [{'chunk': c['chunk'][:150], 'domain': c['domain'], 'score': c['rerank_score']} 
                       for c in top_chunks[:3]] if top_chunks else [],
            'metrics': metrics,
            'processing_time': processing_time,
            'is_emergency': is_emergency
        }

print("‚úÖ MemoryEfficientRAGPipeline loaded (6-7GB RAM instead of 15GB)")


‚úÖ MemoryEfficientRAGPipeline loaded (6-7GB RAM instead of 15GB)


In [3]:
# ======================== CELL 4: INITIALIZE MEMORY-EFFICIENT PIPELINE ==========================

print("\n" + "="*80)
print("üöÄ INITIALIZING MEMORY-EFFICIENT PIPELINE")
print("="*80 + "\n")

pipeline = MemoryEfficientRAGPipeline(config, DOMAINS)

print("\n" + "="*80)
print("‚úÖ PIPELINE READY!")
print("üíæ Startup RAM: ~2GB (Peak during query: 6-7GB)")
print("="*80)



üöÄ INITIALIZING MEMORY-EFFICIENT PIPELINE

üè• INITIALIZING MEDICAL RAG SYSTEM

üì¶ Loading lightweight embedder...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  ‚úÖ Embedder loaded (80MB)

‚úÖ Pipeline initialized
üíæ Domains: 11 (will load on-demand)

‚úÖ PIPELINE READY!
üíæ Startup RAM: ~2GB (Peak during query: 6-7GB)


In [4]:
# ======================== CELL 5: INTERACTIVE MODE ==========================

def ask_question():
    """Interactive medical Q&A"""
    print("\n" + "="*80)
    print("üí¨ INTERACTIVE MEDICAL QA MODE")
    print("="*80)
    print("Type your medical questions below.")
    print("Type 'quit' or 'exit' to stop.\n")
    
    while True:
        query = input("\nüîç Your Question: ").strip()
        
        if not query:
            print("‚ö†Ô∏è  Please enter a question")
            continue
        
        if query.lower() in ['quit', 'exit', 'stop', 'q']:
            print("\nüëã Goodbye!")
            break
        
        print("\n" + "-"*80)
        
        try:
            result = pipeline.run_query(query)
            
            print(f"\nüí° **ANSWER:**")
            print(f"{result['answer']}\n")
            
            print(f"üìä Confidence: {result['metrics']['composite']:.2f}")
            print(f"üéØ Knowledge Domains: {', '.join(result['domains'])}")
            print(f"‚è±Ô∏è  Response Time: {result['processing_time']:.2f}s")
            
            if result['sources']:
                show_sources = input("\nüìö Show sources? (y/n): ").strip().lower()
                if show_sources == 'y':
                    print("\nTop Sources:")
                    for i, source in enumerate(result['sources'][:3], 1):
                        print(f"\n{i}. [{source['domain']}] Relevance: {source['score']:.2f}")
                        print(f"   {source['chunk']}")
        
        except Exception as e:
            print(f"\n‚ùå Error: {e}")
        
        print("\n" + "-"*80)

# Run
ask_question()



üí¨ INTERACTIVE MEDICAL QA MODE
Type your medical questions below.
Type 'quit' or 'exit' to stop.




üîç Your Question:  I'm 35, on metformin for diabetes and sertraline for anxiety. Having irregular periods, mood swings, weight gain. Could medications cause this?



--------------------------------------------------------------------------------

üîç Query: I'm 35, on metformin for diabetes and sertraline for anxiety. Having irregular periods, mood swings, weight gain. Could medications cause this?
  üìÇ Loading general_medical index...
    ‚úÖ Loaded 710919 chunks
  üìÇ Loading mental_health index...
    ‚úÖ Loaded 22565 chunks
  üìÇ Loading ophthalmology index...
    ‚úÖ Loaded 57979 chunks
  üìÇ Loading pediatrics index...
    ‚úÖ Loaded 19888 chunks
  üìÇ Loading symptoms_triage index...
    ‚úÖ Loaded 147907 chunks
üìç Domains: general_medical
üîé Retrieving information...
üéØ Reranking...
  üì¶ Loading reranker...


config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

    ‚úÖ Reranker loaded (300MB)
  üóëÔ∏è  Reranker unloaded
üí¨ Generating answer...
  üì¶ Loading generator...


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

    ‚úÖ Generator loaded (900MB)
  üóëÔ∏è  Generator unloaded
  üóëÔ∏è  Freed memory from 4 domains
‚úÖ Done in 310.97s (confidence: 0.64)

üí° **ANSWER:**
However, it is possible that some persons may experience significant mood or anxiety symptoms due to the medication. If your anxiety symptoms are not severe, the would suggest that you try relaxation exercises like deep breathing, progressive muscle relaxation, yoga, meditation, etc. and try to stay as stress-free as possible.

‚ö†Ô∏è Please consult a healthcare professional for personalized medical advice.

üìä Confidence: 0.64
üéØ Knowledge Domains: general_medical
‚è±Ô∏è  Response Time: 310.97s



üìö Show sources? (y/n):  y



Top Sources:

1. [general_medical] Relevance: 0.79
   However, it is possible that some persons may experience significant mood or anxiety symptoms due to the medication. If your anxiety symptoms are not 

2. [general_medical] Relevance: 0.68
   Anti depressants such as sertraline can provide relief with your complaints of low mood and reduced energy levels, etc. Discuss with the treating onco

3. [general_medical] Relevance: 0.66
   Metformin helps in treating the insulin resistance. The possibility of pregnancy depends on whether ovulation is occurring or not. As you are having i

--------------------------------------------------------------------------------



üîç Your Question:  I have sudden severe chest pain radiating to left arm, shortness of breath, sweating heavily. I'm on metoprolol. What should I do?



--------------------------------------------------------------------------------

üîç Query: I have sudden severe chest pain radiating to left arm, shortness of breath, sweating heavily. I'm on metoprolol. What should I do?
üö® EMERGENCY DETECTED
üìç Domains: symptoms_triage
üí¨ Generating answer...
  üóëÔ∏è  Freed memory from 1 domains
‚úÖ Done in 1.10s (confidence: 0.95)

üí° **ANSWER:**
üö® **EMERGENCY - SEEK IMMEDIATE MEDICAL ATTENTION**

Please call 911 or go to the nearest emergency room immediately. Based on your symptoms, you may have a life-threatening condition.

‚ö†Ô∏è This is an emergency. Do not delay.

üìä Confidence: 0.95
üéØ Knowledge Domains: symptoms_triage
‚è±Ô∏è  Response Time: 1.10s

--------------------------------------------------------------------------------



üîç Your Question:  My 82-year-old grandmother on metformin developed sudden cognitive decline. Doctor says it's normal aging, but she was sharp 3 weeks ago. Is metformin causing this?



--------------------------------------------------------------------------------

üîç Query: My 82-year-old grandmother on metformin developed sudden cognitive decline. Doctor says it's normal aging, but she was sharp 3 weeks ago. Is metformin causing this?
  üìÇ Loading general_medical index...
    ‚úÖ Loaded 710919 chunks
  üìÇ Loading mental_health index...
    ‚úÖ Loaded 22565 chunks
  üìÇ Loading ophthalmology index...
    ‚úÖ Loaded 57979 chunks
  üìÇ Loading pediatrics index...
    ‚úÖ Loaded 19888 chunks
  üìÇ Loading symptoms_triage index...
    ‚úÖ Loaded 147907 chunks
üìç Domains: general_medical
üîé Retrieving information...
üéØ Reranking...
  üì¶ Loading reranker...
    ‚úÖ Reranker loaded (300MB)
  üóëÔ∏è  Reranker unloaded
üí¨ Generating answer...
  üóëÔ∏è  Freed memory from 4 domains
‚úÖ Done in 288.55s (confidence: 0.62)

üí° **ANSWER:**
She is 83 years old, and it is common to see cognitive decline at this age. The problems of memory loss, inability to 


üìö Show sources? (y/n):  y



Top Sources:

1. [general_medical] Relevance: 0.69
   She is 83 years old, and it is common to see cognitive decline at this age. The problems of memory loss, inability to remember numbers or passwords co

2. [general_medical] Relevance: 0.52
   can help. These medicines will reduce the progression of her cognitive decline, and she will feel better. Provide her warm loving support, and she wil

3. [general_medical] Relevance: 0.49
   These medicines will reduce the progression of her cognitive decline, and she will feel better. Provide her warm loving support, and she will show imp

--------------------------------------------------------------------------------



üîç Your Question:  exit



üëã Goodbye!
