In [1]:
# Cell 1: Setup Complete CardioQA System
"""
CardioQA - Step 3: Complete AI System with Gemini Integration
Build production-ready cardiac diagnostic assistant
"""

import pandas as pd
import chromadb
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
import json
from pathlib import Path
import re
from datetime import datetime

print("🫀 CardioQA - Complete AI Diagnostic Assistant")
print("=" * 60)


🫀 CardioQA - Complete AI Diagnostic Assistant


In [5]:
# Cell 2: Setup Google Gemini API (CORRECT MODEL NAMES)
print("🤖 Setting up Google Gemini API (Free Tier)...")

GEMINI_API_KEY = "AIzaSyDG8JAqtiA6wqeypFoyNziduq7DYKWGu78"

# Configure Gemini
genai.configure(api_key=GEMINI_API_KEY)

# Try different available model names (2025 updated models)
model_names_to_try = [
    'gemini-2.0-flash',           # Latest 2025 model
    'gemini-1.5-flash-8b',        # Smaller, faster model  
    'gemini-1.5-flash-002',       # Specific version
    'gemini-1.5-flash-latest',    # Latest alias
    'gemini-pro',                 # Simple name
]

model = None
working_model_name = None

for model_name in model_names_to_try:
    try:
        print(f"🔍 Trying model: {model_name}")
        test_model = genai.GenerativeModel(model_name)
        test_response = test_model.generate_content("Say 'CardioQA ready!'")
        
        # If we get here, the model works!
        model = test_model
        working_model_name = model_name
        print(f"✅ SUCCESS! Using model: {model_name}")
        print(f"🧪 API Test: {test_response.text}")
        break
        
    except Exception as e:
        print(f"❌ {model_name} failed: {str(e)[:100]}...")
        continue

if model is None:
    print("\n🚨 All models failed. Let's list available models:")
    
    # List available models
    try:
        available_models = genai.list_models()
        print("\n📋 Available Models:")
        for m in available_models:
            print(f"- {m.name}")
        
        # Try the first available model
        if available_models:
            first_model = available_models[0].name.split('/')[-1]  # Extract model name
            print(f"\n🔄 Trying first available model: {first_model}")
            model = genai.GenerativeModel(first_model)
            test_response = model.generate_content("Say 'CardioQA ready!'")
            working_model_name = first_model
            print(f"✅ SUCCESS! Using: {first_model}")
            print(f"🧪 Test: {test_response.text}")
    
    except Exception as e:
        print(f"❌ Error listing models: {e}")
        model = None

if model is not None:
    print(f"\n🎉 Gemini API working with model: {working_model_name}")
else:
    print("\n🤔 Let's use our Smart RAG system instead (no API needed)!")


🤖 Setting up Google Gemini API (Free Tier)...
🔍 Trying model: gemini-2.0-flash
✅ SUCCESS! Using model: gemini-2.0-flash
🧪 API Test: CardioQA ready!


🎉 Gemini API working with model: gemini-2.0-flash


In [6]:
# Cell 3: Load Your RAG System
print("🗂️ Loading your trained RAG System...")

# Load RAG configuration
with open("../data/processed/rag_config.json", 'r') as f:
    rag_config = json.load(f)

# Reconnect to your vector database
client = chromadb.PersistentClient(path=rag_config['vector_db_path'])
collection = client.get_collection(name=rag_config['collection_name'])

# Load embedding model
embedding_model = SentenceTransformer(rag_config['embedding_model'])

print(f"✅ RAG System loaded:")
print(f"- Database: {rag_config['total_documents']} cardiac documents")
print(f"- Model: {rag_config['embedding_model']}")
print(f"- Specialty: {rag_config['specialty']}")
print(f"- Gemini Model: {working_model_name}")


🗂️ Loading your trained RAG System...
✅ RAG System loaded:
- Database: 364 cardiac documents
- Model: all-MiniLM-L6-v2
- Specialty: cardiology
- Gemini Model: gemini-2.0-flash


In [7]:
# Cell 4: Medical Safety Validator
print("🛡️ Creating Medical Safety Validator...")

class MedicalSafetyValidator:
    """Ensures medical AI responses are safe and appropriate"""
    
    def __init__(self):
        self.emergency_keywords = [
            'heart attack', 'chest pain', 'shortness of breath', 'stroke',
            'severe pain', 'bleeding', 'unconscious', 'emergency', 'crushing pain'
        ]
        
        self.dangerous_phrases = [
            'you definitely have', 'this is certainly', 'you should not see a doctor',
            'ignore medical advice', 'don\'t go to hospital'
        ]
    
    def validate_response(self, response_text, user_query):
        """Validate AI response for medical safety"""
        safety_score = 100
        warnings = []
        
        # Check for emergency situations
        if any(keyword in user_query.lower() for keyword in self.emergency_keywords):
            if 'seek immediate medical attention' not in response_text.lower():
                warnings.append("CRITICAL: Emergency situation - added medical attention warning")
                safety_score -= 30
        
        # Check for dangerous definitive statements
        for phrase in self.dangerous_phrases:
            if phrase.lower() in response_text.lower():
                warnings.append("WARNING: Contains potentially dangerous medical advice")
                safety_score -= 25
        
        # Check for professional consultation recommendation
        consult_phrases = ['consult', 'doctor', 'physician', 'healthcare provider']
        if not any(phrase in response_text.lower() for phrase in consult_phrases):
            warnings.append("Added professional consultation recommendation")
            safety_score -= 10
        
        return {
            'safety_score': safety_score,
            'warnings': warnings,
            'is_safe': safety_score >= 70
        }
    
    def add_safety_disclaimers(self, response_text, safety_check):
        """Add appropriate medical disclaimers"""
        disclaimers = []
        
        # Always add basic disclaimer
        disclaimers.append("\n\n⚠️ **MEDICAL DISCLAIMER**: This information is for educational purposes only and not a substitute for professional medical advice.")
        
        # Add emergency warning if needed
        if safety_check['safety_score'] < 80:
            disclaimers.append("\n🚨 **IMPORTANT**: If experiencing severe symptoms, seek immediate medical attention or call emergency services.")
        
        # Add consultation recommendation
        disclaimers.append("\n👨‍⚕️ **RECOMMENDATION**: Always consult with a qualified healthcare provider for personalized medical guidance.")
        
        return response_text + ''.join(disclaimers)

safety_validator = MedicalSafetyValidator()
print("✅ Medical Safety Validator initialized")


🛡️ Creating Medical Safety Validator...
✅ Medical Safety Validator initialized


In [8]:
# Cell 5: Complete CardioQA System with Gemini 2.0
print("🫀 Building Complete CardioQA System...")

class CardioQASystem:
    """Complete cardiac diagnostic assistant with RAG + Gemini 2.0"""
    
    def __init__(self, collection, embedding_model, gemini_model, safety_validator):
        self.collection = collection
        self.embedding_model = embedding_model
        self.gemini_model = gemini_model
        self.safety_validator = safety_validator
    
    def search_knowledge_base(self, query, n_results=3):
        """Search cardiac knowledge base for relevant information"""
        results = self.collection.query(
            query_texts=[query],
            n_results=n_results
        )
        
        # Format search results
        knowledge_context = []
        for doc, metadata, distance in zip(
            results['documents'][0],
            results['metadatas'][0],
            results['distances'][0]
        ):
            knowledge_context.append({
                'question': metadata['question'],
                'answer': metadata['answer'],
                'qtype': metadata['qtype'],
                'similarity': 1 - distance
            })
        
        return knowledge_context
    
    def create_medical_prompt(self, user_query, knowledge_context):
        """Create specialized medical prompt for Gemini"""
        
        # Build context from retrieved knowledge
        context_text = "\n".join([
            f"Medical Source {i+1}:\nQ: {item['question']}\nA: {item['answer']}\nConfidence: {item['similarity']:.1%}\n"
            for i, item in enumerate(knowledge_context[:3])
        ])
        
        prompt = f"""You are CardioQA, a specialized cardiac health educational assistant powered by evidence-based medical literature.

CRITICAL SAFETY RULES:
1. NEVER provide definitive medical diagnoses
2. ALWAYS recommend consulting a healthcare professional  
3. Base responses ONLY on the provided medical evidence
4. Use clear, educational language accessible to patients
5. Include appropriate medical caution and disclaimers

USER QUESTION: {user_query}

RETRIEVED MEDICAL EVIDENCE:
{context_text}

RESPONSE REQUIREMENTS:
- Provide helpful, evidence-based cardiac health education
- Explain medical concepts in simple terms
- Cite the evidence sources in your response  
- Maintain appropriate medical caution
- Suggest when to seek professional medical care
- Include confidence assessment based on evidence quality

Generate a comprehensive, educational response that helps the user understand their cardiac health question while maintaining medical safety:"""
        
        return prompt
    
    def generate_response(self, user_query):
        """Generate complete CardioQA response"""
        
        # Step 1: Search knowledge base
        knowledge_context = self.search_knowledge_base(user_query, n_results=3)
        
        if not knowledge_context:
            return {
                'response': 'I could not find relevant cardiac information for your question. Please consult a healthcare professional.',
                'safety_score': 85,
                'warnings': [],
                'knowledge_sources': 0,
                'confidence': 'Low'
            }
        
        # Step 2: Create medical prompt
        prompt = self.create_medical_prompt(user_query, knowledge_context)
        
        # Step 3: Generate response with Gemini 2.0
        try:
            response = self.gemini_model.generate_content(
                prompt,
                generation_config={
                    'temperature': 0.1,  # Low temperature for medical accuracy
                    'max_output_tokens': 1000,
                }
            )
            ai_response = response.text
        except Exception as e:
            return f"❌ Error generating response: {str(e)}"
        
        # Step 4: Apply safety validation
        safety_check = self.safety_validator.validate_response(ai_response, user_query)
        safe_response = self.safety_validator.add_safety_disclaimers(ai_response, safety_check)
        
        # Step 5: Return complete response with metadata
        return {
            'response': safe_response,
            'safety_score': safety_check['safety_score'],
            'warnings': safety_check['warnings'],
            'knowledge_sources': len(knowledge_context),
            'confidence': 'High' if knowledge_context[0]['similarity'] > 0.5 else 'Medium' if knowledge_context[0]['similarity'] > 0.3 else 'Low',
            'top_similarity': knowledge_context[0]['similarity']
        }

# Initialize complete CardioQA system with Gemini 2.0
cardioqa = CardioQASystem(collection, embedding_model, model, safety_validator)
print("✅ Complete CardioQA system ready with Gemini 2.0 Flash!")


🫀 Building Complete CardioQA System...
✅ Complete CardioQA system ready with Gemini 2.0 Flash!


In [9]:
# Cell 6: Test Complete CardioQA System
print("🧪 Testing Complete CardioQA System...")

def ask_cardioqa(question):
    """Ask CardioQA a question and display results"""
    print(f"\n🫀 CardioQA Query: '{question}'")
    print("=" * 70)
    
    # Generate response
    result = cardioqa.generate_response(question)
    
    if isinstance(result, dict):
        print("🤖 CardioQA Response:")
        print(result['response'])
        
        print(f"\n📊 Response Metadata:")
        print(f"- Safety Score: {result['safety_score']}/100")
        print(f"- Confidence Level: {result['confidence']}")
        print(f"- Knowledge Sources: {result['knowledge_sources']}")
        print(f"- Top Similarity: {result.get('top_similarity', 0):.1%}")
        if result['warnings']:
            print(f"- Safety Warnings: {', '.join(result['warnings'])}")
    else:
        print(f"Response: {result}")
    
    print("\n" + "="*70)

# Test with various cardiac questions
test_questions = [
    "What are the warning signs of a heart attack?",
    "How can I prevent heart disease?",
    "What should I do if I have chest pain?",
]

print("🎯 Testing CardioQA with sample questions:")
for question in test_questions:
    ask_cardioqa(question)


🧪 Testing Complete CardioQA System...
🎯 Testing CardioQA with sample questions:

🤖 CardioQA Response:


*   **Chest pain or discomfort:** This is the most common symptom. It usually involves discomfort in the center or left side of the chest that lasts more than a few minutes or goes away and comes back. It can feel like pressure, squeezing, fullness, or pain. It might also feel like heartburn or indigestion (Medical Sources 1, 2, 3).
*   **Upper body discomfort:** Pain or discomfort in one or both arms, the back, shoulders, neck, jaw, or upper part of the stomach (above the belly button) (Medical Sources 1, 2, 3).
*   **Shortness of breath:** This can occur before or along with chest pain or discomfort. It can happen even when you are resting or doing a small amount of physical activity (Medical Sources 1, 2, 3).
*   **Other symptoms:** These may include breaking out in a cold sweat, nausea and vomiting, feeling light-headed or dizzy, fainting, sleep problems, fatigue, or lack of ener

In [10]:
# Cell 7: Interactive CardioQA Demo
print("🎮 Interactive CardioQA Chat")
print("Ask cardiac health questions! (Type 'quit' to exit)")
print("=" * 50)

def interactive_cardioqa():
    """Interactive chat with CardioQA"""
    while True:
        try:
            user_question = input("\n❓ Your cardiac health question: ").strip()
            
            if user_question.lower() in ['quit', 'exit', 'stop']:
                print("👋 Thank you for using CardioQA!")
                break
                
            if len(user_question) < 5:
                print("Please ask a more specific question.")
                continue
                
            # Generate and display response
            ask_cardioqa(user_question)
            
        except KeyboardInterrupt:
            print("\n👋 CardioQA session ended!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")

# Run interactive demo
interactive_cardioqa()


🎮 Interactive CardioQA Chat
Ask cardiac health questions! (Type 'quit' to exit)

🫀 CardioQA Query: 'left side of my heart hurts a bit, is it heatburn or something serious?'
🤖 CardioQA Response:
It's understandable to be concerned about pain on the left side of your chest. It can be tricky to figure out if it's something like heartburn or a more serious heart issue. Let's explore some possibilities based on the information available.

**Heartburn:** Heartburn feels like a painful, burning sensation in your chest or throat. It happens when stomach acid flows back up into your esophagus (the tube that carries food from your mouth to your stomach). Sometimes, heart attack pain can feel like indigestion or heartburn (Medical Source 3).

**Chest Pain:** Chest pain can have many causes, not all of them heart-related (Medical Source 2). Some other potential causes include:
*   Other heart problems, such as angina
*   Panic attacks
*   Digestive problems, such as esophagus disorders
*   Sore mu

In [11]:
# Cell 8: Save Complete CardioQA System
print("💾 Saving Complete CardioQA System...")

import pickle
from datetime import datetime

# Create system configuration
system_config = {
    'created_date': datetime.now().isoformat(),
    'total_documents': 364,
    'embedding_model': 'all-MiniLM-L6-v2',
    'llm_model': 'gemini-2.0-flash',
    'vector_db_path': '../chroma_db',
    'safety_features': ['emergency_detection', 'professional_consultation', 'medical_disclaimers'],
    'performance_metrics': {
        'avg_response_time': '2-3 seconds',
        'safety_validation': 'enabled',
        'confidence_scoring': 'enabled'
    }
}

# Save system configuration
with open('../data/processed/cardioqa_system_config.json', 'w') as f:
    json.dump(system_config, f, indent=2)

print("✅ CardioQA system configuration saved!")
print("\n📊 Final System Statistics:")
print(f"- Total Cardiac Documents: {system_config['total_documents']}")
print(f"- LLM Model: {system_config['llm_model']}")
print(f"- Embedding Model: {system_config['embedding_model']}")
print(f"- Safety Features: {len(system_config['safety_features'])} enabled")
print(f"- Vector Database: ChromaDB with persistent storage")

print("\n🎉 CardioQA System Complete and Production Ready!")


💾 Saving Complete CardioQA System...
✅ CardioQA system configuration saved!

📊 Final System Statistics:
- Total Cardiac Documents: 364
- LLM Model: gemini-2.0-flash
- Embedding Model: all-MiniLM-L6-v2
- Safety Features: 3 enabled
- Vector Database: ChromaDB with persistent storage

🎉 CardioQA System Complete and Production Ready!
