# 🔬 Anton Visual Microscopy Analysis

Interactive notebook for real-time microscopy analysis with live VLM insights and CMPO annotations.

**Features:**
- 🖼️ Side-by-side image display and analysis
- 🧠 Live VLM analysis streaming
- 🧬 Color-coded CMPO phenotype annotations
- ⚡ Real-time pipeline progress
- 🎛️ Interactive parameter controls

In [1]:
# Import required libraries
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.colors import ListedColormap
from PIL import Image
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output, Markdown
import asyncio
from pathlib import Path
import random
from datetime import datetime

# Add Anton to path
sys.path.append(str(Path().absolute()))
from anton.core.pipeline import AnalysisPipeline
from anton.utils.image_io import ImageLoader
from anton.cmpo.mapping import map_to_cmpo
from anton.cmpo.ontology import CMPOOntology

print("✅ Anton Visual Analysis - Libraries loaded successfully!")

✅ Anton Visual Analysis - Libraries loaded successfully!


## 🎛️ Configuration & Setup

In [2]:
# Check API keys and setup
def check_setup():
    """Check if Anton is properly configured."""
    status = []
    
    # Check API keys
    if os.getenv('GOOGLE_API_KEY'):
        status.append("✅ Google API Key found")
        vlm_provider = "gemini"
    elif os.getenv('ANTHROPIC_API_KEY'):
        status.append("✅ Anthropic API Key found")
        vlm_provider = "claude"
    else:
        status.append("⚠️ No API key found - using mock responses")
        vlm_provider = "mock"
    
    # Check BBBC013 dataset
    bbbc013_path = Path("data/bbbc013/BBBC013_v1_images_bmp")
    if bbbc013_path.exists():
        image_count = len(list(bbbc013_path.glob("*.BMP")))
        status.append(f"✅ BBBC013 dataset found ({image_count} images)")
    else:
        status.append("❌ BBBC013 dataset not found")
    
    return status, vlm_provider

status_list, provider = check_setup()
for status in status_list:
    print(status)

print(f"\n🚀 Using VLM Provider: {provider.upper()}")

✅ Google API Key found
✅ BBBC013 dataset found (192 images)

🚀 Using VLM Provider: GEMINI


## 🖼️ Interactive Image Selector

In [3]:
# Load available images
def load_available_images():
    """Load list of available BBBC013 images."""
    dataset_path = Path("data/bbbc013/BBBC013_v1_images_bmp")
    if not dataset_path.exists():
        return []
    
    # Get Channel 1 (FKHR-GFP) images only
    images = list(dataset_path.glob("Channel1-*.BMP"))
    return sorted([img.name for img in images])

available_images = load_available_images()
print(f"📊 Found {len(available_images)} FKHR-GFP channel images")

# Create image selector widget
if available_images:
    image_selector = widgets.Dropdown(
        options=available_images,
        value=random.choice(available_images),
        description='Select Image:',
        style={'description_width': 'initial'}
    )
    
    # Analysis controls
    analyze_btn = widgets.Button(
        description="🔬 Analyze Image",
        button_style="success",
        layout=widgets.Layout(width='200px')
    )
    
    progress_bar = widgets.IntProgress(
        value=0,
        min=0,
        max=4,
        description='Pipeline:',
        bar_style='info'
    )
    
    display(widgets.HBox([image_selector, analyze_btn]))
    display(progress_bar)
else:
    print("❌ No BBBC013 images found. Please run the BBBC013 demo first to download the dataset.")

📊 Found 96 FKHR-GFP channel images


HBox(children=(Dropdown(description='Select Image:', index=51, options=('Channel1-01-A-01.BMP', 'Channel1-02-A…

IntProgress(value=0, bar_style='info', description='Pipeline:', max=4)

## 🎨 Visualization Setup

In [4]:
# CMPO color scheme
CMPO_COLORS = {
    'nuclear': '#FF6B6B',      # Red for nuclear phenotypes
    'cytoplasm': '#4ECDC4',    # Teal for cytoplasmic phenotypes
    'localization': '#45B7D1', # Blue for localization phenotypes
    'morphology': '#96CEB4',   # Green for morphology phenotypes
    'high_conf': '#2ECC71',    # Bright green for high confidence
    'med_conf': '#F39C12',     # Orange for medium confidence
    'low_conf': '#E74C3C'      # Red for low confidence
}

def get_cmpo_color(term_name, confidence):
    """Get color for CMPO term based on content and confidence."""
    # Color by confidence first
    if confidence >= 4.5:
        return CMPO_COLORS['high_conf']
    elif confidence >= 3.5:
        return CMPO_COLORS['med_conf']
    else:
        return CMPO_COLORS['low_conf']

def create_visualization_layout():
    """Create the main visualization layout."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    
    # Left: Image display
    ax1.set_title("🔬 Microscopy Image", fontsize=14, fontweight='bold')
    ax1.axis('off')
    
    # Right: Analysis space (will be updated with text)
    ax2.set_title("🧠 VLM Analysis", fontsize=14, fontweight='bold')
    ax2.axis('off')
    
    plt.tight_layout()
    return fig, ax1, ax2

print("🎨 Visualization setup complete!")

🎨 Visualization setup complete!


## 🧠 Live Analysis Engine

In [5]:
class LiveAnalysisDisplay:
    """Real-time analysis display with image and text updates."""
    
    def __init__(self):
        self.analysis_output = widgets.Output()
        self.image_output = widgets.Output()
        self.cmpo_output = widgets.Output()
        self.current_results = None
        
    def display_layout(self):
        """Display the analysis layout."""
        # Create tabs for different views
        tab = widgets.Tab(children=[self.analysis_output, self.cmpo_output])
        tab.set_title(0, "🧠 VLM Analysis")
        tab.set_title(1, "🧬 CMPO Phenotypes")
        
        # Main layout: Image on left, analysis on right
        main_layout = widgets.HBox([
            self.image_output,
            tab
        ])
        
        display(main_layout)
    
    def update_image(self, image_path):
        """Update the displayed image."""
        with self.image_output:
            clear_output(wait=True)
            
            # Load and display image
            loader = ImageLoader()
            image = loader.load(image_path)
            
            plt.figure(figsize=(8, 8))
            plt.imshow(image, cmap='gray')
            plt.title(f"🔬 {Path(image_path).name}", fontsize=14, fontweight='bold')
            plt.axis('off')
            
            # Add image info
            info_text = f"Shape: {image.shape} | Type: {image.dtype} | Range: [{image.min():.0f}, {image.max():.0f}]"
            plt.figtext(0.5, 0.02, info_text, ha='center', fontsize=10)
            
            plt.tight_layout()
            plt.show()
    
    def update_analysis(self, stage_name, content, is_final=False):
        """Update analysis content in real-time."""
        with self.analysis_output:
            if stage_name == "start":
                clear_output(wait=True)
                print("🚀 Starting Anton Analysis Pipeline...")
                print("=" * 50)
            else:
                print(f"\n📋 {stage_name}:")
                print("-" * 30)
                
                # Truncate very long content for display
                if len(content) > 1000:
                    display_content = content[:1000] + "\n\n... [Content truncated for display] ..."
                else:
                    display_content = content
                
                print(display_content)
                
                if is_final:
                    print("\n✅ Analysis complete!")
    
    def update_cmpo(self, all_stage_results):
        """Update CMPO annotations display."""
        with self.cmpo_output:
            clear_output(wait=True)
            print("🧬 CMPO PHENOTYPE ANALYSIS")
            print("=" * 40)
            
            try:
                from anton.cmpo.mapping import map_to_cmpo
                from anton.cmpo.ontology import CMPOOntology
                
                cmpo_mapper = CMPOOntology()
                all_cmpo = []
                
                for stage_key in ['stage_1_global', 'stage_2_objects', 'stage_3_features', 'stage_4_population']:
                    if stage_key in all_stage_results and all_stage_results[stage_key]:
                        stage_data = all_stage_results[stage_key]
                        stage_num = stage_key.split('_')[1]
                        
                        # Extract text content
                        stage_text = ""
                        if 'description' in stage_data:
                            stage_text = stage_data['description']
                        elif 'segmentation_guidance' in stage_data:
                            stage_text = stage_data['segmentation_guidance']
                        elif 'feature_descriptions' in stage_data and stage_data['feature_descriptions']:
                            stage_text = stage_data['feature_descriptions'][0]
                        elif 'population_summary' in stage_data:
                            stage_text = stage_data['population_summary']
                        
                        if stage_text and len(stage_text) > 50:
                            # Clean JSON formatting
                            if '```' in stage_text:
                                lines = stage_text.split('\n')
                                stage_text = '\n'.join([line for line in lines if not line.strip().startswith('```')])
                            
                            cmpo_results = map_to_cmpo(stage_text, cmpo_mapper, context='protein_localization')
                            
                            if cmpo_results:
                                print(f"\n📋 STAGE {stage_num} PHENOTYPES:")
                                for i, mapping in enumerate(cmpo_results[:3]):
                                    term = mapping.get('term_name', 'Unknown')
                                    confidence = mapping.get('confidence', 0)
                                    cmpo_id = mapping.get('CMPO_ID', 'Unknown')
                                    
                                    # Color-code by confidence
                                    if confidence >= 4.5:
                                        marker = "🟢"
                                    elif confidence >= 3.5:
                                        marker = "🟡"
                                    else:
                                        marker = "🟠"
                                    
                                    print(f"   {marker} {term}")
                                    print(f"      Confidence: {confidence:.2f} | ID: {cmpo_id}")
                                
                                all_cmpo.extend(cmpo_results)
                
                # Summary
                if all_cmpo:
                    print(f"\n📊 SUMMARY:")
                    print(f"   Total CMPO terms identified: {len(all_cmpo)}")
                    print(f"   Unique terms: {len(set(m.get('CMPO_ID') for m in all_cmpo))}")
                    
                    # Top terms across all stages
                    top_terms = sorted(all_cmpo, key=lambda x: x.get('confidence', 0), reverse=True)[:5]
                    print(f"\n🏆 TOP 5 PHENOTYPES:")
                    for i, term in enumerate(top_terms, 1):
                        name = term.get('term_name', 'Unknown')
                        conf = term.get('confidence', 0)
                        print(f"   {i}. {name} ({conf:.2f})")
                
            except Exception as e:
                print(f"❌ CMPO mapping failed: {e}")

# Create the live display instance
live_display = LiveAnalysisDisplay()
print("🎭 Live analysis display ready!")

🎭 Live analysis display ready!


## 🚀 Main Analysis Interface

In [6]:
def run_live_analysis(image_name):
    """Run Anton analysis with live updates."""
    
    # Setup
    image_path = Path(f"data/bbbc013/BBBC013_v1_images_bmp/{image_name}")
    if not image_path.exists():
        print(f"❌ Image not found: {image_path}")
        return
    
    # Configure pipeline with BBBC013 context
    bbbc013_context = {
        "experiment_type": "protein_translocation",
        "cell_line": "U2OS_osteosarcoma", 
        "protein": "FKHR-GFP",
        "drugs": ["Wortmannin", "LY294002"],
        "readout": "nuclear_vs_cytoplasmic_localization",
        "channels": ["FKHR-GFP", "DNA_DRAQ"]
    }
    
    if provider == "gemini":
        config = {"vlm_provider": "gemini", "channels": [0], "biological_context": bbbc013_context}
    elif provider == "claude":
        config = {"vlm_provider": "claude", "channels": [0], "biological_context": bbbc013_context}
    else:
        config = {"vlm_provider": "mock", "channels": [0], "biological_context": bbbc013_context}
    
    # Update image display
    live_display.update_image(image_path)
    
    # Initialize pipeline
    live_display.update_analysis("start", "")
    progress_bar.value = 0
    
    try:
        pipeline = AnalysisPipeline(config)
        
        # Run pipeline with live updates
        live_display.update_analysis("INITIALIZATION", f"🔧 Pipeline configured with {provider.upper()} VLM\n🧬 BBBC013 biological context loaded\n📁 Analyzing: {image_name}")
        
        # Run synchronous pipeline
        results = pipeline.run_pipeline_sync(image_path)
        
        # Update progress and display results stage by stage
        stage_names = ["STAGE 1 (Global Scene)", "STAGE 2 (Object Detection)", "STAGE 3 (Feature Analysis)", "STAGE 4 (Population Insights)"]
        stage_keys = ['stage_1_global', 'stage_2_objects', 'stage_3_features', 'stage_4_population']
        
        for i, (stage_name, stage_key) in enumerate(zip(stage_names, stage_keys)):
            progress_bar.value = i + 1
            
            if stage_key in results and results[stage_key]:
                stage_data = results[stage_key]
                
                # Extract meaningful content for display
                if 'description' in stage_data:
                    content = stage_data['description']
                elif 'segmentation_guidance' in stage_data:
                    content = stage_data['segmentation_guidance']
                elif 'population_summary' in stage_data:
                    content = stage_data['population_summary']
                else:
                    content = f"✅ {stage_name} completed\nKeys: {list(stage_data.keys())}"
                
                live_display.update_analysis(stage_name, content, is_final=(i == 3))
            else:
                live_display.update_analysis(stage_name, f"⚠️ No results for {stage_name}")
        
        # Update CMPO display
        live_display.update_cmpo(results)
        live_display.current_results = results
        
        progress_bar.bar_style = 'success'
        print(f"\n🎉 Analysis complete for {image_name}!")
        
    except Exception as e:
        live_display.update_analysis("ERROR", f"❌ Analysis failed: {str(e)}")
        progress_bar.bar_style = 'danger'
        import traceback
        traceback.print_exc()

# Connect button to analysis function
def on_analyze_click(b):
    """Handle analyze button click."""
    selected_image = image_selector.value
    print(f"🔬 Analyzing {selected_image}...")
    run_live_analysis(selected_image)

if available_images:
    analyze_btn.on_click(on_analyze_click)
    print("🎮 Interactive analysis ready! Select an image and click 'Analyze Image'")
else:
    print("❌ No images available for analysis")

🎮 Interactive analysis ready! Select an image and click 'Analyze Image'


## 🖼️ Live Analysis Display

**Instructions:**
1. Select an image from the dropdown above
2. Click "🔬 Analyze Image" to start the analysis
3. Watch the live updates in the panels below
4. Switch between "VLM Analysis" and "CMPO Phenotypes" tabs

**The analysis will show:**
- 🖼️ **Left Panel**: Original microscopy image with metadata
- 🧠 **Analysis Tab**: Real-time VLM analysis through all 4 pipeline stages
- 🧬 **CMPO Tab**: Phenotype classifications with confidence scores

In [8]:
# Display the live analysis interface
if available_images:
    live_display.display_layout()
else:
    print("❌ Cannot display analysis interface - no BBBC013 images found")
    print("\nTo get started:")
    print("1. Run the BBBC013 demo script to download the dataset")
    print("2. Restart this notebook")



## 🎛️ Advanced Controls

Additional tools for detailed analysis and customization.

In [9]:
# Export results functionality
def export_analysis_results():
    """Export current analysis results to file."""
    if live_display.current_results:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"anton_analysis_{timestamp}.json"
        
        import json
        with open(filename, 'w') as f:
            # Convert any non-serializable objects to strings
            serializable_results = {}
            for key, value in live_display.current_results.items():
                if isinstance(value, dict):
                    serializable_results[key] = {k: str(v) for k, v in value.items()}
                else:
                    serializable_results[key] = str(value)
            
            json.dump(serializable_results, f, indent=2)
        
        print(f"✅ Results exported to {filename}")
    else:
        print("❌ No analysis results to export. Run an analysis first.")

# Create export button
export_btn = widgets.Button(
    description="💾 Export Results",
    button_style="info",
    layout=widgets.Layout(width='150px')
)

def on_export_click(b):
    export_analysis_results()

export_btn.on_click(on_export_click)

# Random image button
random_btn = widgets.Button(
    description="🎲 Random Image",
    button_style="warning",
    layout=widgets.Layout(width='150px')
)

def on_random_click(b):
    if available_images:
        image_selector.value = random.choice(available_images)
        print(f"🎲 Selected random image: {image_selector.value}")

random_btn.on_click(on_random_click)

# Display advanced controls
if available_images:
    display(widgets.HBox([export_btn, random_btn]))
    
    print("\n🎛️ Advanced controls:")
    print("• 💾 Export Results: Save analysis to JSON file")
    print("• 🎲 Random Image: Pick a random image for analysis")
else:
    print("Advanced controls will be available once BBBC013 dataset is loaded.")

HBox(children=(Button(button_style='info', description='💾 Export Results', layout=Layout(width='150px'), style…


🎛️ Advanced controls:
• 💾 Export Results: Save analysis to JSON file
• 🎲 Random Image: Pick a random image for analysis
