# Medical-Optimized Breast Tissue Segmentation and BYOL Augmentation Demo

This notebook demonstrates the medical-optimized breast tissue segmentation and BYOL augmentations implemented in `train_byol_mammo.py`. Shows tiles, frequency energy detection for micro-calcifications, and medical-appropriate transforms.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path
import cv2
from scipy import ndimage
from skimage import morphology, measure, filters
import random
from tqdm import tqdm
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader

# Import BYOL transforms
from lightly.transforms.byol_transform import BYOLTransform

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

In [None]:
# Configuration - Same as train_byol_mammo.py
DATA_DIR = Path("./split_images/training")
TILE_SIZE = 256
TILE_STRIDE = 128
NUM_SAMPLES = 10
MIN_BREAST_RATIO = 0.1  # Lowered for micro-calcifications (updated from 0.3)
MIN_FREQ_ENERGY = 0.02  # INCREASED: more stringent to avoid background noise
MIN_BREAST_FOR_FREQ = 0.08  # INCREASED: more breast tissue required for frequency-based selection

In [None]:
def compute_frequency_energy(image_patch: np.ndarray) -> float:
    """
    Compute high-frequency energy using Laplacian of Gaussian (LoG) 
    to detect micro-calcifications and other high-frequency structures.
    UPDATED: Better background rejection and noise filtering
    """
    if len(image_patch.shape) == 3:
        gray = cv2.cvtColor(image_patch, cv2.COLOR_RGB2GRAY)
    else:
        gray = image_patch.copy()
    
    # Reject very dark patches (pure background)
    mean_intensity = np.mean(gray)
    if mean_intensity < 20:  # Very dark background
        return 0.0
    
    # Apply Laplacian of Gaussian for high-frequency detection
    blurred = cv2.GaussianBlur(gray.astype(np.float32), (3, 3), 1.0)
    laplacian = cv2.Laplacian(blurred, cv2.CV_32F, ksize=3)
    
    # Focus on positive responses (bright spots, not dark edges)
    positive_laplacian = np.maximum(laplacian, 0)
    
    # Compute energy (normalized variance of high-frequency components)
    # Only consider areas with sufficient intensity (avoid background noise)
    mask = gray > (mean_intensity * 0.3)  # Focus on brighter regions
    if np.sum(mask) < (gray.size * 0.1):  # Too little tissue
        return 0.0
    
    masked_laplacian = positive_laplacian[mask]
    energy = np.var(masked_laplacian) / (mean_intensity + 1e-8)
    
    return float(energy)


def segment_breast_tissue(image_array: np.ndarray) -> np.ndarray:
    """
    Segment breast tissue from mammogram using morphological operations.
    SAME FUNCTION AS IN train_byol_mammo.py - but with better background rejection
    """
    if len(image_array.shape) == 3:
        gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
    else:
        gray = image_array.copy()
    
    # Gentle blur to preserve medical details
    blurred = cv2.GaussianBlur(gray, (3, 3), 0)
    
    # Enhanced Otsu thresholding with better background separation
    # Pre-filter very dark pixels (pure background)
    filtered_gray = np.where(gray > 10, gray, 0)
    
    # Otsu thresholding for breast tissue segmentation
    _, binary = cv2.threshold(filtered_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # More aggressive background removal
    binary = np.where(gray > 15, binary, 0).astype(np.uint8)
    
    # Minimal morphological operations to preserve detail
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    opened = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    
    # Fill holes
    filled = ndimage.binary_fill_holes(opened).astype(np.uint8) * 255
    
    # Keep largest connected component (main breast tissue)
    num_labels, labels = cv2.connectedComponents(filled)
    if num_labels > 1:
        largest_label = 1 + np.argmax([np.sum(labels == i) for i in range(1, num_labels)])
        mask = (labels == largest_label).astype(np.uint8) * 255
    else:
        mask = filled
    
    # Gentle closing to smooth boundaries
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask > 0

In [None]:
def extract_breast_tiles_with_freq_energy(image_array, breast_mask, tile_size, stride, min_breast_ratio=0.1, min_freq_energy=0.02, min_breast_for_freq=0.08):
    """
    Extract tiles containing sufficient breast tissue OR high-frequency content (micro-calcifications).
    UPDATED: Enhanced background rejection and more stringent frequency energy requirements
    """
    height, width = image_array.shape[:2]
    tiles = []
    
    # Generate all possible tile positions
    y_positions = list(range(0, max(1, height - tile_size + 1), stride))
    x_positions = list(range(0, max(1, width - tile_size + 1), stride))
    
    # Add edge positions if needed
    if y_positions[-1] + tile_size < height:
        y_positions.append(height - tile_size)
    if x_positions[-1] + tile_size < width:
        x_positions.append(width - tile_size)
    
    for y in y_positions:
        for x in x_positions:
            # Extract image tile first for comprehensive analysis
            tile_image = image_array[y:y+tile_size, x:x+tile_size]
            
            # Check breast tissue ratio in this tile
            tile_mask = breast_mask[y:y+tile_size, x:x+tile_size]
            breast_ratio = np.sum(tile_mask) / (tile_size * tile_size)
            
            # Additional background rejection: check mean intensity
            mean_intensity = np.mean(tile_image)
            if mean_intensity < 25:  # Very dark tile (likely background)
                continue
            
            # Frequency analysis
            freq_energy = compute_frequency_energy(tile_image)
            
            # Enhanced tile selection logic:
            # 1. High breast tissue ratio (normal case)
            # 2. High frequency energy BUT only if there's meaningful breast tissue AND not too dark
            if (breast_ratio >= min_breast_ratio or 
                (freq_energy >= min_freq_energy and 
                 breast_ratio >= min_breast_for_freq and 
                 mean_intensity >= 30)):  # Additional intensity check for freq tiles
                tiles.append((tile_image, (x, y), breast_ratio, freq_energy))
    
    return tiles


def analyze_tile_quality(tiles):
    """
    Analyze the quality and characteristics of selected tiles
    """
    if not tiles:
        return {}
    
    breast_ratios = [t[2] for t in tiles]
    freq_energies = [t[3] for t in tiles]
    intensities = [np.mean(t[0]) for t in tiles]
    
    # Separate by selection method
    breast_tiles = [t for t in tiles if t[2] >= MIN_BREAST_RATIO]
    freq_tiles = [t for t in tiles if t[2] < MIN_BREAST_RATIO and t[3] >= MIN_FREQ_ENERGY]
    
    analysis = {
        'total_tiles': len(tiles),
        'breast_method_tiles': len(breast_tiles),
        'freq_method_tiles': len(freq_tiles),
        'avg_breast_ratio': np.mean(breast_ratios),
        'avg_freq_energy': np.mean(freq_energies),
        'avg_intensity': np.mean(intensities),
        'intensity_range': (np.min(intensities), np.max(intensities)),
        'low_intensity_tiles': sum(1 for i in intensities if i < 50),  # Potentially problematic
        'high_freq_low_breast': sum(1 for t in tiles if t[3] >= MIN_FREQ_ENERGY and t[2] < MIN_BREAST_RATIO)
    }
    
    return analysis


def create_medical_transforms(input_size: int):
    """
    Create BYOL transforms optimized for medical imaging.
    SAME FUNCTION AS IN train_byol_mammo.py
    """
    # Medical-appropriate transforms for View 1 (lighter augmentations)
    view1_transform = T.Compose([
        T.ToTensor(),
        T.RandomHorizontalFlip(p=0.5),
        T.RandomRotation(degrees=7, fill=0),  # Small rotations to preserve anatomy
        T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0, hue=0),  # Mild brightness/contrast, no color
        T.Resize(input_size, antialias=True),
        T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Grayscale-appropriate normalization
    ])
    
    # Medical-appropriate transforms for View 2 (slightly stronger augmentations)  
    view2_transform = T.Compose([
        T.ToTensor(),
        T.RandomHorizontalFlip(p=0.5),
        T.RandomRotation(degrees=7, fill=0),
        T.ColorJitter(brightness=0.15, contrast=0.15, saturation=0, hue=0),  # Slightly stronger
        T.RandomAffine(degrees=0, translate=(0.05, 0.05), scale=(0.95, 1.05), fill=0),  # Small translations/scaling
        T.GaussianBlur(kernel_size=3, sigma=(0.1, 0.5)),  # Very mild blur to preserve details
        T.Resize(input_size, antialias=True),
        T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Grayscale-appropriate normalization
    ])
    
    return BYOLTransform(
        view_1_transform=view1_transform,
        view_2_transform=view2_transform,
    )

In [None]:
# Get sample images from the dataset
image_paths = list(DATA_DIR.glob("*.png"))
print(f"Found {len(image_paths)} images in {DATA_DIR}")

if len(image_paths) == 0:
    print("❌ No images found! Make sure the path is correct and contains .png files")
else:
    # Select random sample
    sample_paths = random.sample(image_paths, min(NUM_SAMPLES, len(image_paths)))
    print(f"✅ Processing {len(sample_paths)} sample images to demonstrate medical BYOL pipeline")

In [None]:
# Process each sample image with the ENHANCED medical pipeline
results = []

if 'sample_paths' in locals() and len(sample_paths) > 0:
    for i, img_path in enumerate(tqdm(sample_paths, desc="Processing images with enhanced medical pipeline")):
        print(f"\nProcessing image {i+1}: {img_path.name}")
        
        # Load image
        with Image.open(img_path) as img:
            img_array = np.array(img)
        
        print(f"  Image shape: {img_array.shape}")
        print(f"  Image intensity range: [{np.min(img_array)}, {np.max(img_array)}]")
        
        # Segment breast tissue using enhanced function
        breast_mask = segment_breast_tissue(img_array)
        breast_area = np.sum(breast_mask)
        total_area = breast_mask.shape[0] * breast_mask.shape[1]
        breast_percentage = (breast_area / total_area) * 100
        
        print(f"  Breast tissue: {breast_percentage:.1f}% of image")
        
        # Extract tiles using ENHANCED logic with better background rejection
        tiles = extract_breast_tiles_with_freq_energy(
            img_array, breast_mask, TILE_SIZE, TILE_STRIDE, 
            MIN_BREAST_RATIO, MIN_FREQ_ENERGY, MIN_BREAST_FOR_FREQ
        )
        
        # Analyze tile quality
        tile_analysis = analyze_tile_quality(tiles)
        
        # Separate tiles by selection criteria
        breast_tiles = [t for t in tiles if t[2] >= MIN_BREAST_RATIO]
        freq_tiles = [t for t in tiles if t[2] < MIN_BREAST_RATIO and t[3] >= MIN_FREQ_ENERGY and t[2] >= MIN_BREAST_FOR_FREQ]
        
        print(f"  Generated {len(tiles)} total tiles:")
        print(f"    - {len(breast_tiles)} tiles by breast tissue ratio (≥{MIN_BREAST_RATIO:.1%})")
        print(f"    - {len(freq_tiles)} tiles by frequency energy (≥{MIN_FREQ_ENERGY:.3f} + ≥{MIN_BREAST_FOR_FREQ:.1%} breast)")
        print(f"    - Average intensity: {tile_analysis.get('avg_intensity', 0):.1f}")
        print(f"    - Low intensity tiles (potential background): {tile_analysis.get('low_intensity_tiles', 0)}")
        
        results.append({
            'path': img_path,
            'image': img_array,
            'mask': breast_mask,
            'tiles': tiles,
            'breast_tiles': breast_tiles,
            'freq_tiles': freq_tiles,
            'breast_percentage': breast_percentage,
            'tile_analysis': tile_analysis
        })

    print(f"\n✅ Completed processing {len(results)} images with ENHANCED medical pipeline")
    print(f"   🛡️  Background rejection: intensity ≥25 for all tiles, ≥30 for frequency tiles")
    print(f"   🔍  Enhanced frequency detection: more stringent thresholds")
    print(f"   📊  Quality analysis: tracking intensity and background contamination")
else:
    print("❌ No sample images to process")

In [None]:
def validate_frequency_energy_detection(results):
    """
    Validate that frequency energy detection is working correctly and not selecting background
    """
    if not results:
        print("❌ No results to validate")
        return
    
    print("🔍 FREQUENCY ENERGY VALIDATION")
    print("=" * 50)
    
    all_freq_tiles = []
    all_background_tiles = []
    
    for result in results:
        freq_tiles = result['freq_tiles']
        
        # Also check some tiles that were rejected for comparison
        img_array = result['image']
        breast_mask = result['mask']
        
        # Sample some background regions for comparison
        background_tiles = []
        height, width = img_array.shape[:2]
        
        # Try to find some background tiles (low breast ratio, low intensity)
        for _ in range(5):  # Sample 5 background regions
            y = np.random.randint(0, max(1, height - TILE_SIZE))
            x = np.random.randint(0, max(1, width - TILE_SIZE))
            
            tile_image = img_array[y:y+TILE_SIZE, x:x+TILE_SIZE]
            tile_mask = breast_mask[y:y+TILE_SIZE, x:x+TILE_SIZE]
            breast_ratio = np.sum(tile_mask) / (TILE_SIZE * TILE_SIZE)
            mean_intensity = np.mean(tile_image)
            
            if breast_ratio < 0.02 and mean_intensity < 50:  # Likely background
                freq_energy = compute_frequency_energy(tile_image)
                background_tiles.append((tile_image, freq_energy, mean_intensity, breast_ratio))
        
        all_freq_tiles.extend(freq_tiles)
        all_background_tiles.extend(background_tiles)
    
    if all_freq_tiles:
        freq_energies = [t[3] for t in all_freq_tiles]
        freq_intensities = [np.mean(t[0]) for t in all_freq_tiles]
        freq_breast_ratios = [t[2] for t in all_freq_tiles]
        
        print(f"📊 FREQUENCY-SELECTED TILES ({len(all_freq_tiles)} tiles):")
        print(f"   • Frequency energy: {np.mean(freq_energies):.4f} ± {np.std(freq_energies):.4f}")
        print(f"   • Mean intensity: {np.mean(freq_intensities):.1f} ± {np.std(freq_intensities):.1f}")
        print(f"   • Breast ratio: {np.mean(freq_breast_ratios):.2%} ± {np.std(freq_breast_ratios):.2%}")
        print(f"   • Intensity range: [{np.min(freq_intensities):.1f}, {np.max(freq_intensities):.1f}]")
    else:
        print("📊 No frequency-selected tiles found")
    
    if all_background_tiles:
        bg_energies = [t[1] for t in all_background_tiles]
        bg_intensities = [t[2] for t in all_background_tiles]
        bg_breast_ratios = [t[3] for t in all_background_tiles]
        
        print(f"\n🌑 BACKGROUND TILES ({len(all_background_tiles)} tiles):")
        print(f"   • Frequency energy: {np.mean(bg_energies):.4f} ± {np.std(bg_energies):.4f}")
        print(f"   • Mean intensity: {np.mean(bg_intensities):.1f} ± {np.std(bg_intensities):.1f}")
        print(f"   • Breast ratio: {np.mean(bg_breast_ratios):.2%} ± {np.std(bg_breast_ratios):.2%}")
        print(f"   • Intensity range: [{np.min(bg_intensities):.1f}, {np.max(bg_intensities):.1f}]")
        
        # Check if any background tiles would have been selected
        problematic_bg = sum(1 for e in bg_energies if e >= MIN_FREQ_ENERGY)
        print(f"   • Background tiles that would pass freq threshold: {problematic_bg}/{len(all_background_tiles)}")
    
    # Visual comparison
    if all_freq_tiles and len(all_freq_tiles) >= 3:
        fig, axes = plt.subplots(2, 6, figsize=(18, 6))
        fig.suptitle("Frequency Energy Detection Validation", fontsize=16)
        
        # Show frequency tiles
        for i in range(min(6, len(all_freq_tiles))):
            tile_img, coords, breast_ratio, freq_energy = all_freq_tiles[i]
            axes[0, i].imshow(tile_img, cmap='gray')
            axes[0, i].set_title(f"Freq Tile {i+1}\\nEnergy: {freq_energy:.3f}\\nBreast: {breast_ratio:.1%}\\nIntensity: {np.mean(tile_img):.0f}")
            axes[0, i].axis('off')
        
        # Show background tiles for comparison
        for i in range(min(6, len(all_background_tiles))):
            if i < len(all_background_tiles):
                tile_img, freq_energy, mean_intensity, breast_ratio = all_background_tiles[i]
                axes[1, i].imshow(tile_img, cmap='gray')
                axes[1, i].set_title(f"Background {i+1}\\nEnergy: {freq_energy:.3f}\\nBreast: {breast_ratio:.1%}\\nIntensity: {mean_intensity:.0f}")
                axes[1, i].axis('off')
            else:
                axes[1, i].axis('off')
        
        plt.tight_layout()
        plt.show()
    
    print(f"\n✅ VALIDATION SUMMARY:")
    if all_freq_tiles:
        avg_freq_intensity = np.mean([np.mean(t[0]) for t in all_freq_tiles])
        min_freq_intensity = np.min([np.mean(t[0]) for t in all_freq_tiles])
        print(f"   • Frequency tiles have good intensity (avg: {avg_freq_intensity:.1f}, min: {min_freq_intensity:.1f})")
        
        if min_freq_intensity > 40:
            print("   ✅ All frequency tiles above background intensity threshold")
        else:
            print("   ⚠️  Some frequency tiles may be background contaminated")
    else:
        print("   ℹ️  No frequency-based tiles selected (may indicate good background rejection)")

# Run validation
if results:
    validate_frequency_energy_detection(results)

In [None]:
def display_enhanced_medical_pipeline(results, max_images=3):
    """
    Display the enhanced medical-optimized segmentation pipeline with background rejection validation
    """
    if not results:
        print("❌ No results to display")
        return
        
    fig, axes = plt.subplots(max_images, 5, figsize=(25, 5*max_images))
    if max_images == 1:
        axes = axes.reshape(1, -1)
    
    for i in range(min(max_images, len(results))):
        result = results[i]
        
        # Original image
        axes[i, 0].imshow(result['image'], cmap='gray')
        axes[i, 0].set_title(f"Original\\n{result['path'].name}")
        axes[i, 0].axis('off')
        
        # Enhanced breast mask
        axes[i, 1].imshow(result['mask'], cmap='gray')
        axes[i, 1].set_title(f"Enhanced Breast Mask\\n{result['breast_percentage']:.1f}% breast tissue")
        axes[i, 1].axis('off')
        
        # Frequency energy heatmap
        if len(result['tiles']) > 0:
            freq_map = np.zeros(result['image'].shape[:2])
            intensity_map = np.zeros(result['image'].shape[:2])
            
            for tile_img, (x, y), breast_ratio, freq_energy in result['tiles']:
                freq_map[y:y+TILE_SIZE, x:x+TILE_SIZE] = max(freq_map[y:y+TILE_SIZE, x:x+TILE_SIZE].max(), freq_energy)
                intensity_map[y:y+TILE_SIZE, x:x+TILE_SIZE] = max(intensity_map[y:y+TILE_SIZE, x:x+TILE_SIZE].max(), np.mean(tile_img))
            
            # Show frequency energy with intensity overlay
            im = axes[i, 2].imshow(freq_map, cmap='hot', alpha=0.8, vmin=0, vmax=0.1)
            axes[i, 2].imshow(result['image'], cmap='gray', alpha=0.2)
            axes[i, 2].set_title(f"Frequency Energy\\n(Enhanced Detection)")
            axes[i, 2].axis('off')
        
        # Intensity validation map
        if len(result['tiles']) > 0:
            intensity_map = np.zeros(result['image'].shape[:2])
            for tile_img, (x, y), breast_ratio, freq_energy in result['tiles']:
                tile_intensity = np.mean(tile_img)
                intensity_map[y:y+TILE_SIZE, x:x+TILE_SIZE] = tile_intensity
            
            im = axes[i, 3].imshow(intensity_map, cmap='viridis', alpha=0.8)
            axes[i, 3].imshow(result['image'], cmap='gray', alpha=0.2)
            axes[i, 3].set_title(f"Tile Intensity\\n(Background Check)")
            axes[i, 3].axis('off')
        
        # Selected tiles overlay with quality indicators
        overlay = result['image'].copy()
        if len(overlay.shape) == 2:
            overlay = np.stack([overlay, overlay, overlay], axis=-1)
        
        # Color coding: Green for good breast tiles, Yellow for freq tiles, Red for potentially problematic
        for tile_img, (x, y), breast_ratio, freq_energy in result['breast_tiles']:
            cv2.rectangle(overlay, (x, y), (x+TILE_SIZE, y+TILE_SIZE), (0, 255, 0), 2)
        
        for tile_img, (x, y), breast_ratio, freq_energy in result['freq_tiles']:
            tile_intensity = np.mean(tile_img)
            if tile_intensity > 50:  # Good intensity
                color = (255, 255, 0)  # Yellow - good frequency tile
            else:
                color = (255, 0, 0)    # Red - potentially problematic
            cv2.rectangle(overlay, (x, y), (x+TILE_SIZE, y+TILE_SIZE), color, 2)
        
        axes[i, 4].imshow(overlay)
        axes[i, 4].set_title(f"Quality-Coded Tiles\\n🟢 Breast ({len(result['breast_tiles'])}) 🟡 Freq-Good 🔴 Freq-Low")
        axes[i, 4].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics
    if results:
        print("\\n📊 ENHANCED PIPELINE STATISTICS:")
        total_tiles = sum(len(r['tiles']) for r in results)
        total_freq_tiles = sum(len(r['freq_tiles']) for r in results)
        
        if total_tiles > 0:
            avg_intensity = np.mean([r['tile_analysis']['avg_intensity'] for r in results if 'tile_analysis' in r])
            low_intensity_count = sum(r['tile_analysis'].get('low_intensity_tiles', 0) for r in results if 'tile_analysis' in r)
            
            print(f"   • Total tiles selected: {total_tiles}")
            print(f"   • Frequency-based tiles: {total_freq_tiles} ({total_freq_tiles/total_tiles*100:.1f}%)")
            print(f"   • Average tile intensity: {avg_intensity:.1f}")
            print(f"   • Low intensity tiles: {low_intensity_count} ({low_intensity_count/total_tiles*100:.1f}%)")
            
            if low_intensity_count == 0:
                print("   ✅ No background contamination detected")
            elif low_intensity_count < total_tiles * 0.05:
                print("   ⚠️  Minimal background contamination (<5%)")
            else:
                print("   🔴 Significant background contamination - consider stricter thresholds")

# Display enhanced pipeline results
if results:
    display_enhanced_medical_pipeline(results, max_images=min(3, len(results)))

In [None]:
# Comprehensive summary of the medical-optimized pipeline
if results:
    total_tiles = sum(len(result['tiles']) for result in results)
    total_breast_tiles = sum(len(result['breast_tiles']) for result in results)
    total_freq_tiles = sum(len(result['freq_tiles']) for result in results)
    avg_tiles_per_image = total_tiles / len(results) if results else 0
    avg_breast_percentage = np.mean([result['breast_percentage'] for result in results])
    
    print("🏥 === MEDICAL-OPTIMIZED BYOL PIPELINE SUMMARY ===")
    print(f"📊 Dataset Statistics:")
    print(f"   • Total images processed: {len(results)}")
    print(f"   • Total tiles generated: {total_tiles:,}")
    print(f"   • Average tiles per image: {avg_tiles_per_image:.1f}")
    print(f"   • Average breast tissue percentage: {avg_breast_percentage:.1f}%")
    
    print(f"\n🎯 Tile Selection Strategy:")
    print(f"   • Breast tissue tiles (≥{MIN_BREAST_RATIO:.1%} tissue): {total_breast_tiles:,} ({total_breast_tiles/total_tiles*100:.1f}%)")
    print(f"   • Frequency energy tiles (≥{MIN_FREQ_ENERGY:.3f} energy): {total_freq_tiles:,} ({total_freq_tiles/total_tiles*100:.1f}%)")
    print(f"   • Tile size: {TILE_SIZE}×{TILE_SIZE} pixels")
    print(f"   • Tile stride: {TILE_STRIDE} pixels ({TILE_STRIDE/TILE_SIZE*100:.0f}% overlap)")
    
    print(f"\n🔬 Medical Improvements vs Original:")
    print(f"   ✅ Lowered breast ratio threshold: 0.3 → {MIN_BREAST_RATIO} (captures peripheral regions)")
    print(f"   ✅ Added frequency energy detection: micro-calcification sensitivity")
    print(f"   ✅ Gentle segmentation: preserves medical details")
    print(f"   ✅ Grayscale-appropriate preprocessing: L→RGB replication")
    
    print(f"\n🎛️ BYOL Augmentation Optimizations:")
    print(f"   ✅ Medical-safe rotations: ±7° (preserves anatomy)")
    print(f"   ✅ Mild brightness/contrast: no color distortion") 
    print(f"   ✅ Light blur: preserves calcification details")
    print(f"   ✅ No solarization/strong color jitter: medical data integrity")
    
    print(f"\n⚡ A100 Performance Optimizations:")
    print(f"   ✅ Mixed precision training: autocast + GradScaler")
    print(f"   ✅ Per-step momentum updates: better convergence")
    print(f"   ✅ Optimized hyperparameters: LR=3e-4, WD=1e-4 (batch=8)")
    print(f"   ✅ Multi-label classification ready: [mass, calcification]")
    
    # Distribution visualization
    breast_ratios = []
    freq_energies = []
    for result in results:
        for tile_img, coords, breast_ratio, freq_energy in result['tiles']:
            breast_ratios.append(breast_ratio)
            freq_energies.append(freq_energy)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # Breast ratio distribution
    ax1.hist(breast_ratios, bins=20, alpha=0.7, edgecolor='black', color='green')
    ax1.axvline(MIN_BREAST_RATIO, color='red', linestyle='--', label=f'Threshold: {MIN_BREAST_RATIO:.1%}')
    ax1.set_xlabel('Breast Tissue Ratio')
    ax1.set_ylabel('Number of Tiles')
    ax1.set_title('Distribution of Breast Tissue Ratios')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Frequency energy distribution
    ax2.hist(freq_energies, bins=20, alpha=0.7, edgecolor='black', color='orange')
    ax2.axvline(MIN_FREQ_ENERGY, color='red', linestyle='--', label=f'Threshold: {MIN_FREQ_ENERGY:.3f}')
    ax2.set_xlabel('Frequency Energy (LoG variance)')
    ax2.set_ylabel('Number of Tiles')
    ax2.set_title('Distribution of Frequency Energy (Calcification Detection)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\n🚀 Ready for A100 training with: sbatch submit_byol.sbatch")
    
else:
    print("❌ No results to summarize - please check the data directory path")