# Medical Q&A Assistant using Qwen3-4B

## Setup and Environment Configuration

In [None]:
# Cell 1: Install required packages
!pip install -q transformers torch numpy pandas matplotlib tqdm colorama accelerate bitsandbytes sentencepiece einops ollama requests scikit-learn


In [None]:
# Cell 2: Import necessary libraries
import os
import sys
import json
import re
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from colorama import Fore, Style
import logging
import requests
import gc
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sklearn.metrics.pairwise import cosine_similarity

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")

# Memory management function
def clear_memory():
    """Clear GPU memory and garbage collect"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    logger.info("Memory cleared")


## Model Loading and Configuration

In [None]:
# Cell 3: Load Qwen3-4B Model
def load_model(model_name="Qwen/Qwen1.5-4B-Chat", use_4bit=True):
    """
    Load the Qwen3-4B model with optimized settings for virtual environments
    """
    logger.info(f"Loading model: {model_name}")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name, 
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "left"
    
    # Load model with optimizations for limited resources
    model_kwargs = {
        "device_map": "auto",
        "trust_remote_code": True,
    }
    
    # Use 4-bit quantization for memory efficiency if requested
    if use_4bit:
        model_kwargs.update({
            "load_in_4bit": True,
            "bnb_4bit_compute_dtype": torch.bfloat16,
            "bnb_4bit_quant_type": "nf4",
        })
    else:
        model_kwargs["torch_dtype"] = torch.bfloat16
    
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            **model_kwargs
        )
        
        # Create text generation pipeline
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1
        )
        
        logger.info("Model loaded successfully!")
        return tokenizer, model, pipe
    
    except Exception as e:
        logger.error(f"Error loading model: {str(e)}")
        raise


In [None]:
# Cell 4: Load model with memory monitoring
import psutil

def log_memory_usage():
    """Log current memory usage"""
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    logger.info(f"Memory usage: {mem_info.rss / 1024**2:.2f} MB")

# Log initial memory usage
log_memory_usage()

# Load model
tokenizer, model, pipe = load_model(use_4bit=True)

# Log memory usage after model loading
log_memory_usage()


## Prompt Templates and Strategies

In [None]:
# Cell 5: Load prompt templates
def load_prompt_template(file_path):
    """Load prompt template from file"""
    try:
        with open(file_path, 'r') as f:
            return f.read()
    except FileNotFoundError:
        logger.error(f"Prompt template file not found: {file_path}")
        # Provide fallback templates if files don't exist
        if "zero_shot" in file_path:
            return """As a medical information assistant, please answer the following healthcare question with accurate information. If you're uncertain, clearly state the limitations of your knowledge.

IMPORTANT MEDICAL DISCLAIMER: This information is for educational purposes only and is not a substitute for professional medical advice. Always consult with a qualified healthcare provider for medical concerns.

Question: {{QUERY}}"""
        elif "few_shot" in file_path:
            return """As a medical information assistant, here are some examples of how to answer healthcare questions:

Question: What are the symptoms of strep throat?
Answer: Strep throat typically presents with sudden sore throat, pain when swallowing, fever over 101°F, red and swollen tonsils (sometimes with white patches), tiny red spots on the roof of the mouth, and swollen lymph nodes in the neck. Unlike viral infections, strep throat usually doesn't cause cough, runny nose, or hoarseness. 

IMPORTANT MEDICAL DISCLAIMER: This information is for educational purposes only. A healthcare professional should confirm diagnosis through testing, as many conditions can cause similar symptoms.

Question: How does insulin work in the body?
Answer: Insulin is a hormone produced by beta cells in the pancreas that regulates blood glucose levels. When you consume carbohydrates, they break down into glucose in the bloodstream. The pancreas then releases insulin, which acts like a key that allows glucose to enter cells for energy use. In people with diabetes, either insufficient insulin is produced (Type 1) or cells become resistant to insulin's effects (Type 2), resulting in elevated blood glucose levels.

IMPORTANT MEDICAL DISCLAIMER: This information is for educational purposes only and not a substitute for professional medical advice. If you have concerns about diabetes or insulin function, please consult with a healthcare provider.

Question: What is the difference between Alzheimer's and dementia?
Answer: Dementia is a general term for symptoms affecting memory, thinking, and social abilities severely enough to interfere with daily functioning. Alzheimer's disease is the most common specific cause of dementia, accounting for 60-80% of cases. While Alzheimer's is characterized by specific brain changes (amyloid plaques and tau tangles), dementia can be caused by various conditions including vascular issues, Lewy body disease, and others. All Alzheimer's patients have dementia, but not all dementia patients have Alzheimer's.

IMPORTANT MEDICAL DISCLAIMER: This information is for educational purposes only. If you're concerned about cognitive changes in yourself or a loved one, please consult with a healthcare professional for proper evaluation and support.

Now please answer this medical question:
{{QUERY}}"""
        elif "cot" in file_path:
            return """As a medical information assistant, please answer the following healthcare question. Think through your reasoning step by step before providing your final answer.

Question: {{QUERY}}

Let me reason through this systematically:
1) First, I'll consider what medical concepts are involved in this question
2) Then, I'll analyze relevant mechanisms, causes, or processes
3) Next, I'll evaluate important distinctions or differential considerations
4) Finally, I'll formulate a clear, accurate answer based on established medical knowledge

Reasoning:"""
        else:  # meta prompt
            return """As a medical information assistant, I'll answer the following healthcare question by breaking it down into sub-questions I need to answer first.

Main Question: {{QUERY}}

Let me ask myself some clarifying questions:
1) What specific medical concepts do I need to understand to answer this question?
2) What are the key mechanisms or processes involved?
3) Are there any important distinctions or considerations I should address?
4) What limitations exist in my knowledge about this topic?
5) What safety warnings or medical disclaimers should I include?

Now I'll answer each of these sub-questions to build my complete response:"""

# Load prompt templates
prompt_templates = {
    "zero_shot": load_prompt_template("prompts/zero_shot.txt"),
    "few_shot": load_prompt_template("prompts/few_shot.txt"),
    "cot": load_prompt_template("prompts/cot_prompt.txt"),
    "meta": load_prompt_template("prompts/meta_prompt.txt")
}

# Define prompt functions
def zero_shot_prompt(query):
    return prompt_templates["zero_shot"].replace("{{QUERY}}", query)

def few_shot_prompt(query):
    return prompt_templates["few_shot"].replace("{{QUERY}}", query)

def cot_prompt(query):
    return prompt_templates["cot"].replace("{{QUERY}}", query)

def meta_prompt(query):
    return prompt_templates["meta"].replace("{{QUERY}}", query)

# Define prompt types dictionary
prompt_types = {
    "zero_shot": zero_shot_prompt,
    "few_shot": few_shot_prompt,
    "cot": cot_prompt,
    "meta": meta_prompt
}

logger.info("Prompt templates loaded successfully!")


## Medical Safety and Disclaimer Functions

In [None]:
# Cell 6: Medical safety functions
def add_medical_disclaimer(response):
    """Add medical disclaimer if not already present"""
    disclaimer = "\n\nIMPORTANT MEDICAL DISCLAIMER: This information is for educational purposes only and is not a substitute for professional medical advice. Always consult with a qualified healthcare provider for medical concerns."
    
    if "DISCLAIMER" not in response and "disclaimer" not in response.lower():
        return response + disclaimer
    return response

def check_emergency_symptoms(query, response):
    """Check for emergency symptoms that require immediate attention"""
    emergency_symptoms = [
        "chest pain", "severe bleeding", "difficulty breathing", "shortness of breath",
        "sudden numbness", "sudden weakness", "sudden confusion", "sudden severe headache",
        "sudden vision loss", "suicidal", "suicide", "heart attack", "stroke"
    ]
    
    emergency_warning = "\n\n⚠️ EMERGENCY WARNING: The symptoms described may indicate a serious medical condition requiring immediate attention. Please seek emergency medical care immediately by calling emergency services or going to the nearest emergency room."
    
    # Check if any emergency symptoms are mentioned in the query
    if any(symptom in query.lower() for symptom in emergency_symptoms):
        if "EMERGENCY WARNING" not in response and "emergency" not in response.lower():
            return response + emergency_warning
    
    return response

def ensure_response_safety(query, response):
    """Ensure response includes proper medical safety elements"""
    # Add medical disclaimer
    response = add_medical_disclaimer(response)
    
    # Check for emergency symptoms
    response = check_emergency_symptoms(query, response)
    
    # Avoid diagnostic language
    response = avoid_diagnostic_language(response)
    
    return response

def avoid_diagnostic_language(response):
    """Replace diagnostic language with more appropriate phrasing"""
    diagnostic_patterns = [
        (r"you have ([a-zA-Z\s]+)", r"you may be experiencing symptoms consistent with \1"),
        (r"you are suffering from ([a-zA-Z\s]+)", r"you may be experiencing \1"),
        (r"you definitely have ([a-zA-Z\s]+)", r"your symptoms may be consistent with \1"),
        (r"you should take ([a-zA-Z0-9\s]+)", r"some healthcare providers may recommend \1, but consult your doctor"),
        (r"I diagnose you with ([a-zA-Z\s]+)", r"these symptoms are sometimes associated with \1")
    ]
    
    for pattern, replacement in diagnostic_patterns:
        response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)
    
    return response


## Query Processing and Response Generation

In [None]:
# Cell 7: Generate medical responses
def generate_medical_response(query, prompt_type="cot"):
    """Generate a medical response using the specified prompt type"""
    try:
        # Get the appropriate prompt function
        prompt_func = prompt_types.get(prompt_type, cot_prompt)
        
        # Format the prompt
        prompt = prompt_func(query)
        
        # Generate response
        logger.info(f"Generating response using {prompt_type} prompt")
        result = pipe(prompt, return_full_text=False)[0]["generated_text"]
        
        # Ensure response safety
        safe_response = ensure_response_safety(query, result)
        
        return {
            "query": query,
            "prompt_type": prompt_type,
            "prompt": prompt,
            "raw_response": result,
            "safe_response": safe_response,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
    
    except Exception as e:
        logger.error(f"Error generating response: {str(e)}")
        return {
            "query": query,
            "prompt_type": prompt_type,
            "error": str(e),
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }


## Ambiguity Handling and Clarification

In [None]:
# Cell 8: Ambiguity handling
def handle_ambiguous_input(query, model_response):
    """
    Detect ambiguous responses and generate clarification prompts
    """
    # Check if response indicates uncertainty or ambiguity
    uncertainty_indicators = [
        "unclear", "ambiguous", "could mean", "need more information",
        "I'm not sure", "it depends", "could refer to", "insufficient details"
    ]
    
    # Check if the response is too short (might indicate confusion)
    is_short_response = len(model_response.split()) < 20
    
    # Check if response contains multiple conflicting possibilities
    has_multiple_possibilities = "on one hand" in model_response.lower() and "on the other hand" in model_response.lower()
    
    if (any(indicator in model_response.lower() for indicator in uncertainty_indicators) or 
        is_short_response or has_multiple_possibilities):
        
        # Generate clarification prompt based on the query type
        if "symptoms" in query.lower() or "signs" in query.lower():
            clarification = f"""I notice your question about "{query}" could benefit from more details:
            
1) Could you specify how long you've been experiencing these symptoms?
2) Are there any other symptoms you're experiencing alongside these?
3) Are you asking about general information or concerned about specific symptoms you're experiencing?

IMPORTANT MEDICAL DISCLAIMER: I can provide general medical information but cannot diagnose conditions or replace professional medical advice. If you're experiencing concerning symptoms, please consult a healthcare provider."""
        
        elif "medication" in query.lower() or "drug" in query.lower() or "medicine" in query.lower():
            clarification = f"""I notice your question about "{query}" could be made more specific:
            
1) Are you asking about specific dosages, side effects, or interactions?
2) Do you have any other medical conditions or take other medications that might be relevant?
3) Are you looking for general information or have concerns about a specific situation?

IMPORTANT MEDICAL DISCLAIMER: I can provide general medication information but cannot give personalized medical advice. Always consult a healthcare provider or pharmacist for guidance on medications."""
        
        else:
            clarification = f"""I notice your question about "{query}" could be interpreted in multiple ways:
            
1) Could you provide more context or specific details about your question?
2) Are you looking for general information or information about a specific situation?
3) Would it help if I explained some of the common terms or concepts related to this topic first?

IMPORTANT MEDICAL DISCLAIMER: I can provide general medical information but cannot diagnose conditions or replace professional medical advice. For medical concerns, please consult a qualified healthcare provider."""
        
        return clarification
    
    return model_response


## Hallucination Detection

In [None]:
# Cell 9: Hallucination detection
def detect_hallucinations(response, query, expected_concepts=None):
    """
    Detect potential hallucinations in model responses
    Returns a hallucination score and flagged issues
    """
    hallucination_indicators = {
        "definitive_claims": [
            r"100% effective", r"always works", r"cures all", r"completely safe",
            r"guaranteed to", r"never causes", r"all patients", r"everyone with"
        ],
        "specific_numbers": [
            r"\d{2,3}% of (patients|people|cases)", r"studies show \d{2,3}%"
        ],
        "unverifiable_claims": [
            r"recent studies show", r"doctors agree that", r"research has proven",
            r"it is well established", r"it is widely accepted"
        ],
        "medical_advice": [
            r"you should", r"you must", r"you need to", r"I recommend",
            r"take \d+ (mg|milligrams)", r"increase your dosage", r"reduce your dosage"
        ]
    }
    
    issues = []
    hallucination_score = 0
    
    # Check for definitive claims
    for pattern in hallucination_indicators["definitive_claims"]:
        if re.search(pattern, response, re.IGNORECASE):
            issues.append(f"Definitive claim detected: '{re.search(pattern, response, re.IGNORECASE).group(0)}'")
            hallucination_score += 2
    
    # Check for specific unverifiable numbers
    for pattern in hallucination_indicators["specific_numbers"]:
        if re.search(pattern, response, re.IGNORECASE):
            issues.append(f"Specific unverifiable statistic: '{re.search(pattern, response, re.IGNORECASE).group(0)}'")
            hallucination_score += 1.5
    
    # Check for unverifiable claims
    for pattern in hallucination_indicators["unverifiable_claims"]:
        if re.search(pattern, response, re.IGNORECASE):
            issues.append(f"Unverifiable claim: '{re.search(pattern, response, re.IGNORECASE).group(0)}'")
            hallucination_score += 1
    
    # Check for medical advice
    for pattern in hallucination_indicators["medical_advice"]:
        if re.search(pattern, response, re.IGNORECASE):
            issues.append(f"Medical advice detected: '{re.search(pattern, response, re.IGNORECASE).group(0)}'")
            hallucination_score += 2
    
    # Check for expected concepts (if provided)
    if expected_concepts:
        found_concepts = 0
        for concept in expected_concepts:
            if concept.lower() in response.lower():
                found_concepts += 1
        
        concept_coverage = found_concepts / len(expected_concepts)
        if concept_coverage < 0.5:
            issues.append(f"Low coverage of expected medical concepts: {found_concepts}/{len(expected_concepts)}")
            hallucination_score += 1
    
    # Normalize score between 0-5 (for evaluation criteria)
    normalized_score = min(5, hallucination_score / 2)
    
    return {
        "score": normalized_score,
        "issues": issues,
        "severity": "High" if normalized_score > 3.5 else "Medium" if normalized_score > 2 else "Low"
    }


## Interactive Demo and Testing

In [None]:
# Cell 10: Test with sample queries
def format_response(response_data):
    """Format a response for display"""
    output = f"Query: {response_data['query']}\n\n"
    output += f"Prompt Type: {response_data['prompt_type']}\n\n"
    output += f"Response:\n{response_data['safe_response']}\n\n"
    
    # Add hallucination check
    hallucination_check = detect_hallucinations(response_data['safe_response'], response_data['query'])
    if hallucination_check['score'] > 0:
        output += f"⚠️ Hallucination Score: {hallucination_check['score']}/5 ({hallucination_check['severity']})\n"
        if hallucination_check['issues']:
            output += "Issues detected:\n"
            for issue in hallucination_check['issues']:
                output += f"- {issue}\n"
    else:
        output += "✅ No hallucinations detected\n"
    
    return output

# Sample queries from the project specification
sample_queries = [
    "What are the early symptoms of diabetes?",
    "Explain what hypertension means in simple terms",
    "Should I be worried about chest pain?",
    "What causes frequent headaches?",
    "When should someone see a cardiologist?"
]

# Test each query with each prompt type
for query in sample_queries:
    print(f"\n{'='*80}\nTesting query: {query}\n{'='*80}")
    
    for prompt_type in prompt_types.keys():
        response_data = generate_medical_response(query, prompt_type)
        print(f"\n{'-'*40}\nPrompt Type: {prompt_type}\n{'-'*40}")
        print(format_response(response_data))
        
        # Clear memory between queries
        clear_memory()


## Interactive Demo

In [None]:
# Cell 11: Interactive demo
def run_interactive_demo():
    """Run an interactive demo of the medical QA system"""
    print(f"{Fore.GREEN}===== Medical Q&A Assistant Demo =====\n{Style.RESET_ALL}")
    print(f"Using Qwen3-4B with optimal prompt strategies")
    print(f"Type 'exit' to quit the demo\n")
    
    while True:
        query = input(f"{Fore.BLUE}Enter your medical question: {Style.RESET_ALL}")
        if query.lower() == 'exit':
            break
        
        # Determine best prompt type for this query
        prompt_type = "cot"  # Default to Chain-of-Thought
        
        print(f"\n{Fore.YELLOW}Processing with {prompt_type} prompt...{Style.RESET_ALL}")
        
        # Generate response
        response_data = generate_medical_response(query, prompt_type)
        
        # Check for ambiguity
        clarification = handle_ambiguous_input(query, response_data['raw_response'])
        if clarification != response_data['raw_response']:
            print(f"\n{Fore.RED}Ambiguity detected. Requesting clarification:{Style.RESET_ALL}")
            print(f"{clarification}\n")
            continue
        
        # Format and display response
        print(f"\n{format_response(response_data)}\n")
        
        # Clear memory
        clear_memory()

# Run the interactive demo
if __name__ == "__main__":
    run_interactive_demo()


python
# Cell 12: Save results and clean up
def save_results(results, filename="medical_qa_results.json"):
    """Save results to a JSON file"""
    with open(filename, "w") as f:
        json.dump(results, f, indent=2)
    logger.info(f"Results saved to {filename}")

# Clean up and free memory
def cleanup():
    """Clean up resources and free memory"""
    global model, tokenizer, pipe
    
    # Delete model and pipeline
    del pipe
    del model
    del tokenizer
    
    # Clear memory
    clear_memory()
    
    logger.info("Cleanup completed")

# Example of saving results
sample_results = []
for query in sample_queries[:2]:  # Just test with 2 queries to save resources
    for prompt_type in prompt_types.keys():
        response_data = generate_medical_response(query, prompt_type)
        sample_results.append(response_data)

# Save sample results
save_results(sample_results, "evaluation/sample_results.json")

# Clean up
cleanup()