In [None]:
# 1. SETUP AND IMPORTS
import os
import sys
import warnings
from pathlib import Path
import tempfile
import logging

# Data manipulation and analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tifffile import imread
import json

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Configure matplotlib for better plots
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

print("‚úÖ Basic imports completed successfully")

# Import our pipeline components
try:
    from dual_modality_core_detector import DualModalityCoreDetector, CoreDetectionConfig
    from matched_core_registration_pipeline import MatchedCoreRegistrationPipeline
    print("‚úÖ Pipeline components imported successfully")
except ImportError as e:
    print(f"‚ùå Failed to import pipeline components: {e}")
    print("Make sure you're running this notebook from the tma_core_pairing_pipeline directory")

# Check for SpaceC availability
try:
    import spacec as sp
    print("‚úÖ SpaceC imported successfully")
except ImportError:
    print("‚ùå SpaceC not available. Install with: pip install spacec")

# Check for registration pipeline availability
try:
    sys.path.append('..')
    from registration_pipeline import RegistrationConfig
    print("‚úÖ Registration pipeline available")
    REGISTRATION_AVAILABLE = True
except ImportError:
    print("‚ö†Ô∏è  Registration pipeline not available - core detection only")
    REGISTRATION_AVAILABLE = False


In [None]:
# 2. FILE PATHS CONFIGURATION
# ================================
# ‚ö†Ô∏è UPDATE THESE PATHS TO MATCH YOUR DATA! ‚ö†Ô∏è
# ================================

# Input file paths - CHANGE THESE!
HE_WSI_PATH = "/scratch/users/ranycs/highplex-ml-synthesis/data/raw/TA118-HEraw.ome.tiff"
ORION_WSI_PATH = "/scratch/users/ranycs/highplex-ml-synthesis/data/raw/TA118-Orionraw.ome.tiff"

# Output directory
OUTPUT_DIR = "./tma_pipeline_output"

# Verify files exist
def check_file_exists(filepath, description):
    if os.path.exists(filepath):
        file_size = os.path.getsize(filepath) / (1024**3)  # Size in GB
        print(f"‚úÖ {description}: {filepath} ({file_size:.2f} GB)")
        return True
    else:
        print(f"‚ùå {description} not found: {filepath}")
        return False

print("üîç Checking input files...")
he_exists = check_file_exists(HE_WSI_PATH, "H&E WSI")
orion_exists = check_file_exists(ORION_WSI_PATH, "Orion WSI")

if not (he_exists and orion_exists):
    print("\n‚ùå Please update the file paths above and re-run this cell")
else:
    print(f"\n‚úÖ All input files found!")
    print(f"üìÅ Output directory will be: {OUTPUT_DIR}")
    
    # Create output directory
    Path(OUTPUT_DIR).mkdir(exist_ok=True)


In [None]:
# 3.5. INTELLIGENT PARAMETER OPTIMIZATION
# ========================================

def analyze_image_intensities(he_path, orion_path, dapi_channel=0, sample_factor=0.1):
    """
    Analyze intensity distributions to suggest optimal detection parameters.
    
    Args:
        he_path: Path to H&E image
        orion_path: Path to Orion image  
        dapi_channel: DAPI channel index
        sample_factor: Fraction of image to sample for speed (0.1 = 10%)
    """
    print("üîç Analyzing image intensity distributions...")
    
    try:
        # Load and sample H&E image
        import openslide
        he_slide = openslide.OpenSlide(he_path)
        he_dims = he_slide.dimensions
        
        # Sample a region for analysis (for speed)
        sample_size = int(min(he_dims) * sample_factor)
        he_sample = he_slide.read_region(
            (he_dims[0]//4, he_dims[1]//4), 0, (sample_size, sample_size)
        ).convert("RGB")
        
        # Convert to grayscale and normalize
        he_gray = np.array(he_sample)
        he_gray = np.mean(he_gray, axis=2) / 255.0  # RGB to grayscale, normalize to [0,1]
        
        he_slide.close()
        
        # Load and sample Orion DAPI channel
        orion_img = imread(orion_path)
        if orion_img.ndim == 3 and orion_img.shape[0] <= 50:
            dapi = orion_img[dapi_channel]
        else:
            raise ValueError(f"Unexpected Orion format: {orion_img.shape}")
        
        # Sample DAPI for analysis
        h, w = dapi.shape
        dapi_sample = dapi[h//4:h//4+int(h*sample_factor), w//4:w//4+int(w*sample_factor)]
        dapi_norm = dapi_sample.astype(np.float32) / dapi_sample.max()  # Normalize to [0,1]
        
        return he_gray, dapi_norm
        
    except Exception as e:
        print(f"‚ùå Error analyzing images: {e}")
        return None, None

def suggest_optimal_parameters(he_gray, dapi_norm, current_config, plot=True):
    """
    Suggest optimal detection parameters based on intensity analysis.
    """
    if he_gray is None or dapi_norm is None:
        print("‚ùå Cannot suggest parameters - image analysis failed")
        return current_config
    
    print("üìä Computing optimal detection thresholds...")
    
    # Analyze H&E intensity distribution
    he_hist, he_bins = np.histogram(he_gray.flatten(), bins=100, range=(0, 1))
    he_hist = he_hist / he_hist.sum()  # Normalize to probabilities
    
    # Analyze DAPI intensity distribution  
    dapi_hist, dapi_bins = np.histogram(dapi_norm.flatten(), bins=100, range=(0, 1))
    dapi_hist = dapi_hist / dapi_hist.sum()  # Normalize to probabilities
    
    # Find optimal H&E thresholds
    # Look for the valley between background (high values) and tissue (low values)
    he_cumsum = np.cumsum(he_hist)
    he_lower_optimal = he_bins[np.argmax(he_cumsum > 0.15)]  # 15th percentile 
    he_upper_optimal = he_bins[np.argmax(he_cumsum > 0.75)]  # 75th percentile
    
    # Find optimal DAPI thresholds
    # Look for tissue signal (higher intensities) vs background (lower intensities)
    dapi_cumsum = np.cumsum(dapi_hist)
    dapi_lower_optimal = dapi_bins[np.argmax(dapi_cumsum > 0.05)]  # 5th percentile (background)
    dapi_upper_optimal = dapi_bins[np.argmax(dapi_cumsum > 0.85)]  # 85th percentile (bright tissue)
    
    # Create updated configuration
    optimized_config = CoreDetectionConfig(
        downscale_factor=current_config.downscale_factor,
        padding=current_config.padding,
        he_lower_cutoff=float(he_lower_optimal),
        he_upper_cutoff=float(he_upper_optimal), 
        orion_lower_cutoff=float(dapi_lower_optimal),
        orion_upper_cutoff=float(dapi_upper_optimal),
        dapi_channel=current_config.dapi_channel,
        max_match_distance=current_config.max_match_distance,
        min_size_ratio=current_config.min_size_ratio,
        max_size_ratio=current_config.max_size_ratio,
        min_circularity=current_config.min_circularity,
        min_core_area=current_config.min_core_area,
        max_core_area=current_config.max_core_area,
        temp_dir=current_config.temp_dir,
        save_debug_images=current_config.save_debug_images
    )
    
    if plot:
        # Create visualization
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # H&E histogram
        axes[0,0].bar(he_bins[:-1], he_hist, width=he_bins[1]-he_bins[0], alpha=0.7, color='red')
        axes[0,0].axvline(current_config.he_lower_cutoff, color='blue', linestyle='--', linewidth=2,
                         label=f'Current lower: {current_config.he_lower_cutoff:.3f}')
        axes[0,0].axvline(current_config.he_upper_cutoff, color='blue', linestyle='--', linewidth=2,
                         label=f'Current upper: {current_config.he_upper_cutoff:.3f}')
        axes[0,0].axvline(he_lower_optimal, color='green', linestyle='-', linewidth=2,
                         label=f'Optimal lower: {he_lower_optimal:.3f}')
        axes[0,0].axvline(he_upper_optimal, color='green', linestyle='-', linewidth=2,
                         label=f'Optimal upper: {he_upper_optimal:.3f}')
        axes[0,0].set_title('H&E Intensity Distribution')
        axes[0,0].set_xlabel('Normalized Intensity')
        axes[0,0].set_ylabel('Probability')
        axes[0,0].legend()
        axes[0,0].grid(True, alpha=0.3)
        
        # DAPI histogram
        axes[0,1].bar(dapi_bins[:-1], dapi_hist, width=dapi_bins[1]-dapi_bins[0], alpha=0.7, color='blue')
        axes[0,1].axvline(current_config.orion_lower_cutoff, color='red', linestyle='--', linewidth=2,
                         label=f'Current lower: {current_config.orion_lower_cutoff:.3f}')
        axes[0,1].axvline(current_config.orion_upper_cutoff, color='red', linestyle='--', linewidth=2,
                         label=f'Current upper: {current_config.orion_upper_cutoff:.3f}')
        axes[0,1].axvline(dapi_lower_optimal, color='green', linestyle='-', linewidth=2,
                         label=f'Optimal lower: {dapi_lower_optimal:.3f}')
        axes[0,1].axvline(dapi_upper_optimal, color='green', linestyle='-', linewidth=2,
                         label=f'Optimal upper: {dapi_upper_optimal:.3f}')
        axes[0,1].set_title('DAPI Intensity Distribution')
        axes[0,1].set_xlabel('Normalized Intensity')
        axes[0,1].set_ylabel('Probability')
        axes[0,1].legend()
        axes[0,1].grid(True, alpha=0.3)
        
        # Sample H&E image
        axes[1,0].imshow(he_gray, cmap='gray')
        axes[1,0].set_title('H&E Sample Region')
        axes[1,0].axis('off')
        
        # Sample DAPI image
        axes[1,1].imshow(dapi_norm, cmap='viridis')
        axes[1,1].set_title('DAPI Sample Region') 
        axes[1,1].axis('off')
        
        plt.tight_layout()
        plt.show()
    
    # Print comparison
    print(f"\nüìä PARAMETER COMPARISON:")
    print(f"{'Parameter':<20} {'Current':<12} {'Optimized':<12} {'Change':<15}")
    print("-" * 60)
    print(f"{'H&E Lower':<20} {current_config.he_lower_cutoff:<12.3f} {he_lower_optimal:<12.3f} {he_lower_optimal-current_config.he_lower_cutoff:+.3f}")
    print(f"{'H&E Upper':<20} {current_config.he_upper_cutoff:<12.3f} {he_upper_optimal:<12.3f} {he_upper_optimal-current_config.he_upper_cutoff:+.3f}")
    print(f"{'Orion Lower':<20} {current_config.orion_lower_cutoff:<12.3f} {dapi_lower_optimal:<12.3f} {dapi_lower_optimal-current_config.orion_lower_cutoff:+.3f}")
    print(f"{'Orion Upper':<20} {current_config.orion_upper_cutoff:<12.3f} {dapi_upper_optimal:<12.3f} {dapi_upper_optimal-current_config.orion_upper_cutoff:+.3f}")
    
    return optimized_config

def grid_search_parameters(he_path, orion_path, base_config, n_points=5):
    """
    Perform grid search to find optimal parameters using SpaceC detection results.
    
    Args:
        he_path: Path to H&E image
        orion_path: Path to Orion image
        base_config: Base configuration to search around
        n_points: Number of points to test in each dimension
    """
    print(f"üîç Starting grid search optimization ({n_points}x{n_points} = {n_points**2} combinations)...")
    print("‚è±Ô∏è  This may take 5-10 minutes...")
    
    # Define search ranges around base parameters
    he_lower_range = np.linspace(max(0.05, base_config.he_lower_cutoff - 0.05), 
                                min(0.30, base_config.he_lower_cutoff + 0.05), n_points)
    he_upper_range = np.linspace(max(0.15, base_config.he_upper_cutoff - 0.05),
                                min(0.40, base_config.he_upper_cutoff + 0.05), n_points)
    orion_lower_range = np.linspace(max(0.02, base_config.orion_lower_cutoff - 0.03),
                                   min(0.25, base_config.orion_lower_cutoff + 0.03), n_points)
    orion_upper_range = np.linspace(max(0.08, base_config.orion_upper_cutoff - 0.03),
                                   min(0.35, base_config.orion_upper_cutoff + 0.03), n_points)
    
    best_score = 0
    best_params = None
    results = []
    
    total_combinations = n_points ** 4
    current_combination = 0
    
    for he_lo in he_lower_range:
        for he_hi in he_upper_range:
            if he_hi <= he_lo:
                continue
                
            for or_lo in orion_lower_range:
                for or_hi in orion_upper_range:
                    if or_hi <= or_lo:
                        continue
                    
                    current_combination += 1
                    if current_combination % 10 == 0:
                        print(f"   Progress: {current_combination}/{total_combinations} ({current_combination/total_combinations*100:.1f}%)")
                    
                    # Create test configuration
                    test_config = CoreDetectionConfig(
                        downscale_factor=base_config.downscale_factor,
                        padding=base_config.padding,
                        he_lower_cutoff=he_lo,
                        he_upper_cutoff=he_hi,
                        orion_lower_cutoff=or_lo,
                        orion_upper_cutoff=or_hi,
                        dapi_channel=base_config.dapi_channel,
                        max_match_distance=base_config.max_match_distance,
                        min_size_ratio=base_config.min_size_ratio,
                        max_size_ratio=base_config.max_size_ratio,
                        min_circularity=base_config.min_circularity,
                        min_core_area=base_config.min_core_area,
                        max_core_area=base_config.max_core_area,
                        temp_dir=base_config.temp_dir,
                        save_debug_images=False  # Disable for speed
                    )
                    
                    try:
                        # Test this configuration
                        test_detector = DualModalityCoreDetector(test_config)
                        test_results = test_detector.detect_and_match_cores(he_path, orion_path)
                        test_detector.cleanup()
                        
                        if test_results['success']:
                            # Calculate score (prioritize matching rate and reasonable detection counts)
                            stats = test_results['detection_stats']
                            quality = test_results['quality_metrics']
                            
                            # Weighted score: matching rate (50%) + detection balance (30%) + spatial accuracy (20%)
                            matching_score = quality['matching_rate']
                            detection_balance = min(stats['he_cores_detected'], stats['orion_cores_detected']) / max(stats['he_cores_detected'], stats['orion_cores_detected'], 1)
                            spatial_score = max(0, 1 - quality['mean_distance'] / 1000)  # Penalize large distances
                            
                            total_score = 0.5 * matching_score + 0.3 * detection_balance + 0.2 * spatial_score
                            
                            results.append({
                                'he_lower': he_lo,
                                'he_upper': he_hi,
                                'orion_lower': or_lo,
                                'orion_upper': or_hi,
                                'he_detected': stats['he_cores_detected'],
                                'orion_detected': stats['orion_cores_detected'],
                                'matched': stats['matched_cores'],
                                'matching_rate': quality['matching_rate'],
                                'mean_distance': quality['mean_distance'],
                                'total_score': total_score
                            })
                            
                            if total_score > best_score:
                                best_score = total_score
                                best_params = {
                                    'he_lower_cutoff': he_lo,
                                    'he_upper_cutoff': he_hi,
                                    'orion_lower_cutoff': or_lo,
                                    'orion_upper_cutoff': or_hi
                                }
                        
                    except Exception as e:
                        # Skip failed configurations
                        continue
    
    print(f"‚úÖ Grid search completed! Tested {len(results)} valid configurations.")
    
    if best_params:
        print(f"\nüèÜ BEST PARAMETERS (Score: {best_score:.3f}):")
        print(f"   H&E thresholds:   {best_params['he_lower_cutoff']:.3f} - {best_params['he_upper_cutoff']:.3f}")
        print(f"   Orion thresholds: {best_params['orion_lower_cutoff']:.3f} - {best_params['orion_upper_cutoff']:.3f}")
        
        # Create optimized configuration
        optimized_config = CoreDetectionConfig(
            downscale_factor=base_config.downscale_factor,
            padding=base_config.padding,
            he_lower_cutoff=best_params['he_lower_cutoff'],
            he_upper_cutoff=best_params['he_upper_cutoff'],
            orion_lower_cutoff=best_params['orion_lower_cutoff'],
            orion_upper_cutoff=best_params['orion_upper_cutoff'],
            dapi_channel=base_config.dapi_channel,
            max_match_distance=base_config.max_match_distance,
            min_size_ratio=base_config.min_size_ratio,
            max_size_ratio=base_config.max_size_ratio,
            min_circularity=base_config.min_circularity,
            min_core_area=base_config.min_core_area,
            max_core_area=base_config.max_core_area,
            temp_dir=base_config.temp_dir,
            save_debug_images=base_config.save_debug_images
        )
        
        return optimized_config, results
    else:
        print("‚ùå No valid configurations found in grid search")
        return base_config, results

# Run the optimization if files are available
if 'HE_WSI_PATH' in locals() and 'ORION_WSI_PATH' in locals():
    if os.path.exists(HE_WSI_PATH) and os.path.exists(ORION_WSI_PATH):
        print("üöÄ Running intelligent parameter optimization...")
        
        # Step 1: Analyze intensity distributions
        he_intensities, dapi_intensities = analyze_image_intensities(
            HE_WSI_PATH, ORION_WSI_PATH, 
            dapi_channel=detection_config.dapi_channel,
            sample_factor=0.05  # Sample 5% for speed
        )
        
        # Step 2: Get statistically-based parameter suggestions
        if he_intensities is not None and dapi_intensities is not None:
            suggested_config = suggest_optimal_parameters(
                he_intensities, dapi_intensities, detection_config, plot=True
            )
            
            print(f"\nüí° RECOMMENDATION:")
            print("   1. The optimized parameters above are based on your image statistics")
            print("   2. Try these parameters first before running grid search")
            print("   3. For even better results, run the optional grid search below")
            
            # Offer to update the configuration
            use_optimized = input(f"\nüîÑ Use optimized parameters? (y/N): ").lower().strip()
            if use_optimized in ['y', 'yes']:
                detection_config = suggested_config
                print("‚úÖ Configuration updated with optimized parameters!")
                
                # Display updated parameters
                print(f"\nüìä UPDATED CONFIGURATION:")
                print(f"   H&E detection:   {detection_config.he_lower_cutoff:.3f} - {detection_config.he_upper_cutoff:.3f}")
                print(f"   Orion detection: {detection_config.orion_lower_cutoff:.3f} - {detection_config.orion_upper_cutoff:.3f}")
            
            # Optional grid search
            run_grid_search = input(f"\nüîç Run grid search for further optimization? (y/N) [Takes 5-10 min]: ").lower().strip()
            if run_grid_search in ['y', 'yes']:
                grid_optimized_config, grid_results = grid_search_parameters(
                    HE_WSI_PATH, ORION_WSI_PATH, detection_config, n_points=3
                )
                
                if len(grid_results) > 0:
                    detection_config = grid_optimized_config
                    print("‚úÖ Configuration updated with grid search results!")
        
        print(f"\nüéØ READY FOR CORE DETECTION with optimized parameters!")
        
    else:
        print("‚ö†Ô∏è  Image files not found - skipping parameter optimization")
        print("   Update the file paths in Section 2 and re-run this cell")
else:
    print("‚ö†Ô∏è  Please run Section 2 (File Paths Configuration) first")


In [None]:
# 3. PARAMETER CONFIGURATION
# ============================

# Core detection configuration
detection_config = CoreDetectionConfig(
    # SpaceC parameters
    downscale_factor=64,        # Downscaling for faster detection (32-128)
    padding=50,                 # Padding around detected regions
    
    # H&E specific parameters (adjust if H&E detection is poor)
    he_lower_cutoff=0.15,       # Lower intensity threshold (0.10-0.25)
    he_upper_cutoff=0.25,       # Upper intensity threshold (0.20-0.35)
    
    # Orion specific parameters (adjust if Orion detection is poor)
    orion_lower_cutoff=0.10,    # Lower intensity threshold (0.05-0.20)
    orion_upper_cutoff=0.20,    # Upper intensity threshold (0.15-0.30)
    dapi_channel=0,             # DAPI channel index (usually 0)
    
    # Core matching parameters
    max_match_distance=500.0,   # Maximum distance between matched cores (pixels)
    min_size_ratio=0.4,         # Minimum size ratio (smaller/larger core)
    max_size_ratio=2.5,         # Maximum size ratio (larger/smaller core)
    min_circularity=0.2,        # Minimum circularity (0.0-1.0, where 1.0 = perfect circle)
    
    # Quality control
    min_core_area=10000,        # Minimum core area in pixels
    max_core_area=1000000,      # Maximum core area in pixels
    
    # Processing
    temp_dir=OUTPUT_DIR,        # Temporary directory
    save_debug_images=True      # Save diagnostic visualizations
)

# Registration configuration (if available)
if REGISTRATION_AVAILABLE:
    registration_config = RegistrationConfig(
        input_dir=OUTPUT_DIR + "/extracted_cores",
        output_dir=OUTPUT_DIR + "/registration_output",
        he_suffix="_HE.tif",
        orion_suffix="_Orion.tif",
        
        # VALIS parameters
        max_processed_image_dim_px=1024,        # Image size for registration
        max_non_rigid_registration_dim_px=1500, # Image size for non-rigid step
        
        # Processing
        patch_size=256,             # Training patch size
        stride=256,                 # Patch stride
        num_workers=4,              # Parallel workers
        
        # Quality control
        min_ssim_threshold=0.3,     # Minimum SSIM for good registration
        min_ncc_threshold=0.2,      # Minimum normalized cross-correlation
        min_mi_threshold=0.5        # Minimum mutual information
    )
    print("‚úÖ Registration configuration loaded")
else:
    registration_config = None
    print("‚ö†Ô∏è  Registration configuration not available")

print("‚úÖ Parameter configuration completed")
print(f"üìä Expected ~{detection_config.downscale_factor}x speed-up from downscaling")
print(f"üéØ Target cores expected: 270+ (you mentioned this in your description)")

# Display key parameters in a nice format
print("\n" + "="*50)
print("KEY PARAMETERS SUMMARY")
print("="*50)
print(f"H&E detection range:     {detection_config.he_lower_cutoff:.3f} - {detection_config.he_upper_cutoff:.3f}")
print(f"Orion detection range:   {detection_config.orion_lower_cutoff:.3f} - {detection_config.orion_upper_cutoff:.3f}")
print(f"Max matching distance:   {detection_config.max_match_distance:.0f} pixels")
print(f"Size ratio range:        {detection_config.min_size_ratio:.1f} - {detection_config.max_size_ratio:.1f}")
print(f"Min circularity:         {detection_config.min_circularity:.1f}")
print(f"Downscale factor:        {detection_config.downscale_factor}x")
print("="*50)


In [None]:
# 4. PIPELINE INITIALIZATION
# ===========================

print("üöÄ Initializing TMA Core Pairing Pipeline...")

try:
    # Initialize the complete pipeline
    pipeline = MatchedCoreRegistrationPipeline(detection_config, registration_config)
    print("‚úÖ Pipeline initialized successfully")
    
    # Quick image format check
    print("\nüîç Checking image formats...")
    
    # Check H&E image
    try:
        import openslide
        he_slide = openslide.OpenSlide(HE_WSI_PATH)
        he_dims = he_slide.dimensions
        he_levels = he_slide.level_count
        he_mpp = he_slide.properties.get('openslide.mpp-x', 'Unknown')
        he_slide.close()
        print(f"‚úÖ H&E Image: {he_dims[0]}x{he_dims[1]} pixels, {he_levels} levels, {he_mpp} Œºm/pixel")
    except Exception as e:
        print(f"‚ö†Ô∏è  H&E format check failed: {e}")
    
    # Check Orion image  
    try:
        orion_img = imread(ORION_WSI_PATH)
        orion_shape = orion_img.shape
        orion_dtype = orion_img.dtype
        print(f"‚úÖ Orion Image: {orion_shape} shape, {orion_dtype} dtype")
        
        if len(orion_shape) == 3 and orion_shape[0] <= 50:
            print(f"   üìä Detected {orion_shape[0]} channels (multi-channel format)")
            print(f"   üéØ Will use channel {detection_config.dapi_channel} for DAPI")
        else:
            print(f"   ‚ö†Ô∏è  Unexpected format - please verify channel structure")
            
        # Free memory
        del orion_img
        
    except Exception as e:
        print(f"‚ùå Orion format check failed: {e}")
        
    print(f"\nüìÅ Pipeline output directory: {pipeline.output_path}")
    
except Exception as e:
    print(f"‚ùå Pipeline initialization failed: {e}")
    raise


In [None]:
# 5. CORE DETECTION AND MATCHING
# ================================

print("üîç Starting dual-modality core detection and matching...")
print("‚è±Ô∏è  This may take 5-15 minutes depending on image size...")

try:
    # Run core detection and matching
    detection_results = pipeline.core_detector.detect_and_match_cores(
        HE_WSI_PATH, 
        ORION_WSI_PATH
    )
    
    if detection_results['success']:
        print("‚úÖ Core detection and matching completed successfully!")
        
        # Extract key statistics
        stats = detection_results['detection_stats']
        quality = detection_results['quality_metrics']
        matched_cores = detection_results['matched_cores']
        
        print(f"\nüìä DETECTION RESULTS:")
        print(f"   H&E cores detected:     {stats['he_cores_detected']}")
        print(f"   Orion cores detected:   {stats['orion_cores_detected']}")
        print(f"   Successfully matched:   {stats['matched_cores']}")
        print(f"   Matching rate:          {quality['matching_rate']:.1%}")
        print(f"   Mean match distance:    {quality['mean_distance']:.1f} pixels")
        print(f"   Mean size ratio:        {quality['mean_size_ratio']:.2f}")
        
        # Check if we got a reasonable number of cores
        if stats['matched_cores'] >= 200:
            print("üéâ Excellent! Detected expected number of cores (200+)")
        elif stats['matched_cores'] >= 100:
            print("üëç Good! Reasonable number of cores detected")
        elif stats['matched_cores'] >= 50:
            print("‚ö†Ô∏è  Moderate number of cores - may need parameter tuning")
        else:
            print("‚ùå Low number of cores detected - parameter adjustment needed")
            
    else:
        print(f"‚ùå Core detection failed: {detection_results.get('error', 'Unknown error')}")
        
except Exception as e:
    print(f"‚ùå Detection process failed: {e}")
    import traceback
    traceback.print_exc()


In [None]:
# 6. VISUALIZATION AND QUALITY ASSESSMENT
# =========================================

# Check if detection was successful before proceeding
if 'detection_results' in locals() and detection_results['success']:
    
    # Extract data for visualization
    he_cores = detection_results['he_cores']
    orion_cores = detection_results['orion_cores'] 
    matched_cores = detection_results['matched_cores']
    quality_metrics = detection_results['quality_metrics']
    
    print("üìä Creating comprehensive visualizations...")
    
    # ============================================
    # PLOT 1: Core Spatial Distribution
    # ============================================
    fig, axes = plt.subplots(1, 3, figsize=(20, 6))
    
    # H&E cores
    if len(he_cores) > 0:
        axes[0].scatter(he_cores['centroid_x'], he_cores['centroid_y'], 
                       c='red', alpha=0.7, s=60, edgecolors='darkred', linewidth=1)
        axes[0].set_title(f'H&E Cores (n={len(he_cores)})', fontsize=14, fontweight='bold')
        axes[0].set_xlabel('X Position (pixels)')
        axes[0].set_ylabel('Y Position (pixels)')
        axes[0].grid(True, alpha=0.3)
        
        # Add core size information as color
        if 'area' in he_cores.columns:
            scatter = axes[0].scatter(he_cores['centroid_x'], he_cores['centroid_y'], 
                                    c=he_cores['area'], cmap='Reds', alpha=0.8, s=60)
            plt.colorbar(scatter, ax=axes[0], label='Core Area (pixels¬≤)')
    
    # Orion cores
    if len(orion_cores) > 0:
        axes[1].scatter(orion_cores['centroid_x'], orion_cores['centroid_y'], 
                       c='blue', alpha=0.7, s=60, edgecolors='darkblue', linewidth=1)
        axes[1].set_title(f'Orion Cores (n={len(orion_cores)})', fontsize=14, fontweight='bold')
        axes[1].set_xlabel('X Position (pixels)')
        axes[1].set_ylabel('Y Position (pixels)')
        axes[1].grid(True, alpha=0.3)
        
        # Add core size information as color
        if 'area' in orion_cores.columns:
            scatter = axes[1].scatter(orion_cores['centroid_x'], orion_cores['centroid_y'], 
                                    c=orion_cores['area'], cmap='Blues', alpha=0.8, s=60)
            plt.colorbar(scatter, ax=axes[1], label='Core Area (pixels¬≤)')
    
    # Matched cores with connections
    if len(matched_cores) > 0:
        he_x = [match['he_centroid'][0] for match in matched_cores]
        he_y = [match['he_centroid'][1] for match in matched_cores]
        orion_x = [match['orion_centroid'][0] for match in matched_cores]
        orion_y = [match['orion_centroid'][1] for match in matched_cores]
        
        # Plot matched cores
        axes[2].scatter(he_x, he_y, c='red', alpha=0.8, s=60, label='H&E matched', edgecolors='darkred')
        axes[2].scatter(orion_x, orion_y, c='blue', alpha=0.8, s=60, label='Orion matched', edgecolors='darkblue')
        
        # Draw connection lines
        for i in range(len(matched_cores)):
            axes[2].plot([he_x[i], orion_x[i]], [he_y[i], orion_y[i]], 
                        'gray', alpha=0.6, linewidth=1.5)
        
        axes[2].set_title(f'Matched Cores (n={len(matched_cores)})', fontsize=14, fontweight='bold')
        axes[2].set_xlabel('X Position (pixels)')
        axes[2].set_ylabel('Y Position (pixels)')
        axes[2].legend()
        axes[2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # ============================================
    # PLOT 2: Quality Metrics Analysis
    # ============================================
    if len(matched_cores) > 0:
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        
        # Extract metrics for plotting
        distances = [match['match_distance'] for match in matched_cores]
        size_ratios = [match['size_ratio'] for match in matched_cores]
        he_areas = [match['he_area'] for match in matched_cores]
        orion_areas = [match['orion_area'] for match in matched_cores]
        he_circularities = [match['he_circularity'] for match in matched_cores]
        orion_circularities = [match['orion_circularity'] for match in matched_cores]
        
        # Distance distribution
        axes[0,0].hist(distances, bins=25, alpha=0.7, color='green', edgecolor='darkgreen')
        axes[0,0].axvline(quality_metrics['mean_distance'], color='red', linestyle='--', linewidth=2,
                         label=f'Mean: {quality_metrics["mean_distance"]:.1f}')
        axes[0,0].axvline(detection_config.max_match_distance, color='orange', linestyle=':', linewidth=2,
                         label=f'Max allowed: {detection_config.max_match_distance:.0f}')
        axes[0,0].set_title('Match Distance Distribution')
        axes[0,0].set_xlabel('Distance (pixels)')
        axes[0,0].set_ylabel('Count')
        axes[0,0].legend()
        axes[0,0].grid(True, alpha=0.3)
        
        # Size ratio distribution
        axes[0,1].hist(size_ratios, bins=25, alpha=0.7, color='purple', edgecolor='darkpurple')
        axes[0,1].axvline(quality_metrics['mean_size_ratio'], color='red', linestyle='--', linewidth=2,
                         label=f'Mean: {quality_metrics["mean_size_ratio"]:.2f}')
        axes[0,1].axvline(detection_config.min_size_ratio, color='orange', linestyle=':', linewidth=2,
                         label=f'Min: {detection_config.min_size_ratio:.1f}')
        axes[0,1].axvline(detection_config.max_size_ratio, color='orange', linestyle=':', linewidth=2,
                         label=f'Max: {detection_config.max_size_ratio:.1f}')
        axes[0,1].set_title('Size Ratio Distribution (H&E/Orion)')
        axes[0,1].set_xlabel('Size Ratio')
        axes[0,1].set_ylabel('Count')
        axes[0,1].legend()
        axes[0,1].grid(True, alpha=0.3)
        
        # Core area correlation
        axes[0,2].scatter(he_areas, orion_areas, alpha=0.7, color='teal')
        # Add perfect correlation line
        min_area = min(min(he_areas), min(orion_areas))
        max_area = max(max(he_areas), max(orion_areas))
        axes[0,2].plot([min_area, max_area], [min_area, max_area], 'red', linestyle='--', linewidth=2,
                      label='Perfect correlation')
        axes[0,2].set_title('Core Area Correlation')
        axes[0,2].set_xlabel('H&E Core Area (pixels¬≤)')
        axes[0,2].set_ylabel('Orion Core Area (pixels¬≤)')
        axes[0,2].legend()
        axes[0,2].grid(True, alpha=0.3)
        
        # Circularity comparison
        axes[1,0].scatter(he_circularities, orion_circularities, alpha=0.7, color='brown')
        axes[1,0].plot([0, 1], [0, 1], 'red', linestyle='--', linewidth=2, label='Perfect correlation')
        axes[1,0].axhline(detection_config.min_circularity, color='orange', linestyle=':', linewidth=2,
                         label=f'Min threshold: {detection_config.min_circularity}')
        axes[1,0].axvline(detection_config.min_circularity, color='orange', linestyle=':', linewidth=2)
        axes[1,0].set_title('Circularity Correlation')
        axes[1,0].set_xlabel('H&E Circularity')
        axes[1,0].set_ylabel('Orion Circularity')
        axes[1,0].legend()
        axes[1,0].grid(True, alpha=0.3)
        
        # Match quality vs distance
        match_qualities = [1.0 / (1.0 + d/100) for d in distances]  # Simple quality metric
        axes[1,1].scatter(distances, match_qualities, alpha=0.7, color='darkgreen')
        axes[1,1].set_title('Match Quality vs Distance')
        axes[1,1].set_xlabel('Match Distance (pixels)')
        axes[1,1].set_ylabel('Match Quality Score')
        axes[1,1].grid(True, alpha=0.3)
        
        # Summary statistics text
        summary_text = f"""
MATCHING STATISTICS SUMMARY
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
Total H&E cores:      {quality_metrics['total_he_cores']}
Total Orion cores:    {quality_metrics['total_orion_cores']}
Matched cores:        {quality_metrics['matched_cores']}
Matching rate:        {quality_metrics['matching_rate']:.1%}

DISTANCE METRICS
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Mean distance:        {quality_metrics['mean_distance']:.1f} px
Median distance:      {quality_metrics['median_distance']:.1f} px
Std deviation:        {quality_metrics['std_distance']:.1f} px

SIZE RATIO METRICS
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Mean ratio:           {quality_metrics['mean_size_ratio']:.2f}
Std deviation:        {quality_metrics['size_ratio_std']:.2f}

QUALITY ASSESSMENT
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
"""
        
        # Add quality assessment
        if quality_metrics['matching_rate'] >= 0.8:
            summary_text += "‚úÖ EXCELLENT matching rate\n"
        elif quality_metrics['matching_rate'] >= 0.6:
            summary_text += "üëç GOOD matching rate\n"
        else:
            summary_text += "‚ö†Ô∏è  LOW matching rate - check parameters\n"
            
        if quality_metrics['mean_distance'] <= 200:
            summary_text += "‚úÖ EXCELLENT spatial accuracy\n"
        elif quality_metrics['mean_distance'] <= 400:
            summary_text += "üëç GOOD spatial accuracy\n"
        else:
            summary_text += "‚ö†Ô∏è  POOR spatial accuracy - check alignment\n"
        
        axes[1,2].text(0.05, 0.95, summary_text, transform=axes[1,2].transAxes, fontsize=10,
                      verticalalignment='top', fontfamily='monospace',
                      bbox=dict(boxstyle='round,pad=1', facecolor='lightblue', alpha=0.8))
        axes[1,2].set_xlim(0, 1)
        axes[1,2].set_ylim(0, 1)
        axes[1,2].axis('off')
        axes[1,2].set_title('Quality Summary', fontsize=14, fontweight='bold')
        
        plt.tight_layout()
        plt.show()
        
    print("‚úÖ Visualization completed!")
    
else:
    print("‚ùå Cannot create visualizations - detection failed or not run yet")
    print("Please run the detection step first and ensure it completes successfully")


In [None]:
# 7. PARAMETER TUNING GUIDE
# ==========================

print("üîß PARAMETER TUNING RECOMMENDATIONS")
print("=" * 50)

if 'detection_results' in locals() and detection_results['success']:
    stats = detection_results['detection_stats']
    quality = detection_results['quality_metrics']
    
    print("Current Results Analysis:")
    print(f"  ‚Ä¢ H&E cores detected: {stats['he_cores_detected']}")
    print(f"  ‚Ä¢ Orion cores detected: {stats['orion_cores_detected']}")
    print(f"  ‚Ä¢ Matched cores: {stats['matched_cores']}")
    print(f"  ‚Ä¢ Matching rate: {quality['matching_rate']:.1%}")
    print(f"  ‚Ä¢ Mean distance: {quality['mean_distance']:.1f} pixels")
    
    print("\nüìä DIAGNOSTIC RECOMMENDATIONS:")
    
    # Analyze detection issues
    if stats['he_cores_detected'] < 200:
        print("\nüî¥ H&E DETECTION ISSUE:")
        print("   Try adjusting H&E detection parameters:")
        print(f"   ‚Ä¢ Lower he_lower_cutoff: {detection_config.he_lower_cutoff} ‚Üí {detection_config.he_lower_cutoff-0.05:.3f}")
        print(f"   ‚Ä¢ Raise he_upper_cutoff: {detection_config.he_upper_cutoff} ‚Üí {detection_config.he_upper_cutoff+0.05:.3f}")
        print("   ‚Ä¢ Or decrease downscale_factor for more detail")
        
    if stats['orion_cores_detected'] < 200: 
        print("\nüîµ ORION DETECTION ISSUE:")
        print("   Try adjusting Orion detection parameters:")
        print(f"   ‚Ä¢ Lower orion_lower_cutoff: {detection_config.orion_lower_cutoff} ‚Üí {detection_config.orion_lower_cutoff-0.03:.3f}")
        print(f"   ‚Ä¢ Raise orion_upper_cutoff: {detection_config.orion_upper_cutoff} ‚Üí {detection_config.orion_upper_cutoff+0.03:.3f}")
        print("   ‚Ä¢ Check if DAPI channel is correct (currently channel 0)")
        
    if quality['matching_rate'] < 0.7:
        print("\nüü° MATCHING ISSUE:")
        print("   Try adjusting matching parameters:")
        print(f"   ‚Ä¢ Increase max_match_distance: {detection_config.max_match_distance} ‚Üí {detection_config.max_match_distance*1.5:.0f}")
        print(f"   ‚Ä¢ Relax size ratio bounds: {detection_config.min_size_ratio}-{detection_config.max_size_ratio} ‚Üí 0.3-3.0")
        print(f"   ‚Ä¢ Lower min_circularity: {detection_config.min_circularity} ‚Üí {detection_config.min_circularity-0.1:.1f}")
        
    if quality['mean_distance'] > 300:
        print("\nüü† SPATIAL ACCURACY ISSUE:")
        print("   Cores are far apart - possible misalignment:")
        print("   ‚Ä¢ Check that images are from the same TMA")
        print("   ‚Ä¢ Verify image orientation (may need flipping)")
        print("   ‚Ä¢ Consider global pre-alignment if severe")
        
    if stats['matched_cores'] >= 200 and quality['matching_rate'] >= 0.8:
        print("\nüü¢ EXCELLENT RESULTS!")
        print("   Your parameters are well-tuned. Proceed to next steps.")
        
else:
    print("‚ùå No detection results available for analysis")
    
print("\n" + "="*50)
print("üí° TO RETUNE PARAMETERS:")
print("1. Modify the parameters in Section 3")
print("2. Re-run Section 4 (Pipeline Initialization)")  
print("3. Re-run Section 5 (Core Detection)")
print("4. Check results in Section 6 (Visualization)")
print("="*50)


In [None]:
# 8. FULL PIPELINE EXECUTION
# ===========================

# Check prerequisites
run_full_pipeline = False

if 'detection_results' in locals() and detection_results['success']:
    stats = detection_results['detection_stats']
    quality = detection_results['quality_metrics']
    
    print("üîç Checking prerequisites for full pipeline...")
    
    # Check if we have good detection results
    if stats['matched_cores'] >= 50 and quality['matching_rate'] >= 0.5:
        print(f"‚úÖ Good detection results: {stats['matched_cores']} matched cores")
        
        # Check if registration is available
        if REGISTRATION_AVAILABLE:
            print("‚úÖ VALIS registration available")
            
            # Ask user confirmation
            print(f"\n‚è±Ô∏è  ESTIMATED TIME: ~{stats['matched_cores'] * 0.5:.0f} minutes for full pipeline")
            print("   (Core extraction: ~1 min, Registration: ~30 sec per core)")
            
            user_input = input("\nüöÄ Run full pipeline? (y/N): ").lower().strip()
            if user_input in ['y', 'yes']:
                run_full_pipeline = True
            else:
                print("Pipeline execution skipped by user")
        else:
            print("‚ùå VALIS registration not available - cannot run full pipeline")
    else:
        print(f"‚ùå Poor detection results ({stats['matched_cores']} cores, {quality['matching_rate']:.1%} rate)")
        print("   Please tune parameters before running full pipeline")
else:
    print("‚ùå No detection results available")

if run_full_pipeline:
    print("\n" + "="*80)
    print("üöÄ STARTING FULL PIPELINE EXECUTION")
    print("="*80)
    
    try:
        # Run the complete pipeline
        full_results = pipeline.run_complete_pipeline(HE_WSI_PATH, ORION_WSI_PATH)
        
        if 'final_report' in full_results:
            report = full_results['final_report']
            
            print("\n" + "="*80)
            print("üìä FULL PIPELINE RESULTS")
            print("="*80)
            
            # Core detection summary
            print("üîç CORE DETECTION:")
            detection_summary = report['core_detection_summary']
            print(f"   H&E cores:        {detection_summary['he_cores_detected']}")
            print(f"   Orion cores:      {detection_summary['orion_cores_detected']}")
            print(f"   Matched cores:    {detection_summary['matched_cores']}")
            print(f"   Matching rate:    {detection_summary['matching_rate']:.1%}")
            
            # Extraction summary
            print("\nüì¶ CORE EXTRACTION:")
            extraction_summary = report['extraction_summary'] 
            print(f"   Extracted cores:  {extraction_summary['cores_successfully_extracted']}")
            print(f"   Success rate:     {extraction_summary['extraction_success_rate']:.1%}")
            
            # Registration summary (if available)
            if 'registration_summary' in report:
                print("\nüîÑ REGISTRATION:")
                reg_summary = report['registration_summary']
                print(f"   Registered cores: {reg_summary['cores_successfully_registered']}")
                print(f"   Success rate:     {reg_summary['registration_success_rate']:.1%}")
                
            # Training dataset summary (if available)
            if 'training_dataset_summary' in report:
                print("\nüéØ TRAINING DATASET:")
                train_summary = report['training_dataset_summary']
                print(f"   Training pairs:   {train_summary['training_pairs_created']}")
                print(f"   Output directory: {train_summary['training_pairs_directory']}")
                
                print("\n‚úÖ PIPELINE COMPLETED SUCCESSFULLY!")
                print("üéâ You now have a paired training dataset ready for model training!")
                
        else:
            print("‚ùå Pipeline completed but no final report generated")
            
    except Exception as e:
        print(f"‚ùå Full pipeline execution failed: {e}")
        import traceback
        traceback.print_exc()
        
else:
    print("\n‚è≠Ô∏è  Skipping full pipeline execution")
    print("üí° You can still examine the detection results above")
    print("   To run the full pipeline later, ensure prerequisites are met and re-run this section")


In [None]:
# 9. RESULTS EXPORT AND NEXT STEPS
# ==================================

print("üìã RESULTS SUMMARY & EXPORT")
print("="*50)

# Export detection results if available
if 'detection_results' in locals() and detection_results['success']:
    
    # Save core matching results as CSV
    matched_cores = detection_results['matched_cores']
    if len(matched_cores) > 0:
        
        # Convert to DataFrame for easy export
        matched_df = pd.DataFrame(matched_cores)
        export_path = Path(OUTPUT_DIR) / "matched_cores_summary.csv"
        matched_df.to_csv(export_path, index=False)
        print(f"‚úÖ Matched cores data exported to: {export_path}")
        
        # Also save detailed H&E and Orion core info
        he_cores = detection_results['he_cores']
        orion_cores = detection_results['orion_cores']
        
        he_export_path = Path(OUTPUT_DIR) / "he_cores_detected.csv"
        orion_export_path = Path(OUTPUT_DIR) / "orion_cores_detected.csv"
        
        he_cores.to_csv(he_export_path, index=False)
        orion_cores.to_csv(orion_export_path, index=False)
        
        print(f"‚úÖ H&E cores data exported to: {he_export_path}")  
        print(f"‚úÖ Orion cores data exported to: {orion_export_path}")
        
        # Print summary statistics
        stats = detection_results['detection_stats']
        quality = detection_results['quality_metrics']
        
        print(f"\nüìä FINAL SUMMARY:")
        print(f"   Total matched core pairs: {len(matched_cores)}")
        print(f"   Average match distance: {quality['mean_distance']:.1f} pixels")
        print(f"   Average size ratio: {quality['mean_size_ratio']:.2f}")
        print(f"   Matching success rate: {quality['matching_rate']:.1%}")

# Check for debug visualizations
debug_vis_dir = Path(OUTPUT_DIR) / "debug_visualizations"
if debug_vis_dir.exists():
    debug_files = list(debug_vis_dir.glob("*.png"))
    if debug_files:
        print(f"\nüñºÔ∏è  Debug visualizations saved to: {debug_vis_dir}")
        for file in debug_files:
            print(f"   ‚Ä¢ {file.name}")

print(f"\nüìÅ All outputs saved to: {OUTPUT_DIR}")

print("\n" + "="*80)
print("üöÄ NEXT STEPS FOR MODEL TRAINING")
print("="*80)

if 'detection_results' in locals() and detection_results['success']:
    matched_count = detection_results['detection_stats']['matched_cores']
    
    if matched_count >= 200:
        print("‚úÖ EXCELLENT! You have sufficient cores for robust model training")
        print("\nRecommended next steps:")
        print("1. üîÑ Run full pipeline (Section 8) to generate registered training pairs")
        print("2. üß† Use the training pairs with your existing multi-protein model")
        print("3. üìä Compare model performance vs global alignment approach")
        print("4. üéØ Focus on the biological limits assessment for realistic expectations")
        
    elif matched_count >= 100:
        print("üëç GOOD! You have a reasonable number of cores for training")
        print("\nRecommended next steps:")
        print("1. üîß Consider parameter tuning to get more cores if possible")
        print("2. üîÑ Run full pipeline to generate training data")
        print("3. üß† Train model and compare against larger datasets")
        
    else:
        print("‚ö†Ô∏è  LIMITED CORES - Need parameter optimization")
        print("\nRecommended next steps:")
        print("1. üîß Use Section 7 to tune parameters and improve detection")
        print("2. üîç Check image quality and format compatibility")
        print("3. üìû Consider manual review of failed detections")

print(f"\nüíæ Key files to keep:")
print(f"   ‚Ä¢ {OUTPUT_DIR}/matched_cores_summary.csv - Core pairing results")
print(f"   ‚Ä¢ {OUTPUT_DIR}/debug_visualizations/ - Quality control plots")
if REGISTRATION_AVAILABLE:
    print(f"   ‚Ä¢ {OUTPUT_DIR}/training_pairs/ - Model training data (after full pipeline)")

print("\nüéâ Core pairing pipeline analysis complete!")

# Cleanup instruction
print(f"\nüßπ CLEANUP:")
print("To clean up temporary files, run: pipeline.cleanup()")
print("This will remove intermediate SpaceC files but keep your results")
