# 01 â€“ Enhanced Nuclear Segmentation

This notebook performs:
1. **Enhanced Nuclear Preprocessing** using true hematoxylin channel extraction (color deconvolution)
2. **Contrast Enhancement** via CLAHE for optimal nuclear visibility
3. **Cell segmentation** using multiple backends:
   - `hematoxylin_watershed` - Recommended for H&E (robust, no ML required)
   - `hematoxylin_instanseg` - Best quality when InstanSeg is available
   - `hematoxylin_adaptive` - Fastest option for well-stained samples
   - `instanseg` - Original InstanSeg on RGB (legacy)
   - `classical` - Simple thresholding (fallback)
4. **Marker detection** for insulin, glucagon, CD3 (for fluorescence images)
5. **Comprehensive visualizations** with preprocessing steps and results

**Key Improvement**: Instead of using stain normalization (which standardizes colors but doesn't
separate stains), we now use true **color deconvolution** to extract the hematoxylin channel
specifically. This isolates the nuclear signal for much better segmentation quality.

Supports both brightfield and fluorescent multiplex images.

In [None]:
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Allow very large whole-slide images
os.environ.setdefault('OPENCV_IO_MAX_IMAGE_PIXELS', str(2**63 - 1))

import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from skimage import exposure

try:
    import openslide
    _OPENSLIDE_AVAILABLE = True
except ImportError:
    openslide = None
    _OPENSLIDE_AVAILABLE = False
    print('OpenSlide not available; WSI formats may not load')

from isletscope.stain import StainNormalizer
from isletscope.segmentation import CellSegmenter

# Import enhanced nuclear segmentation (for visualization)
try:
    from isletscope.nuclear_segmentation import NuclearPreprocessor
    _NUCLEAR_SEG_AVAILABLE = True
    print('Enhanced nuclear segmentation available')
except ImportError:
    _NUCLEAR_SEG_AVAILABLE = False
    print('Enhanced nuclear segmentation not available (install scipy, scikit-image)')

print('Imports complete')

## Configuration

In [None]:
# ===== Input Configuration =====
image_path = '../images/129753.svs'  # Update to your file
max_dimension = 2000  # Downsample WSI to this max dimension
image_type = 'brightfield'  # 'brightfield' or 'fluorescence'

# ===== Stain Normalization (Optional - legacy approach) =====
# Note: For enhanced backends (hematoxylin_*), stain normalization is NOT needed
# as they use true color deconvolution to extract the hematoxylin channel directly.
use_stain_norm = False  # Set to False when using hematoxylin_* backends
stain_method = 'macenko'  # 'macenko' or 'vahadane'
use_gpu = False  # Set True if CuPy installed

# ===== Tissue Detection =====
# IMPORTANT: Detect tissue boundaries BEFORE cell segmentation to avoid false positives on glass/background
detect_tissue_first = True  # Recommended: True
tissue_detection_method = 'otsu'  # 'otsu' (recommended), 'brightness', or 'saturation'

# ===== Segmentation Backend =====
# RECOMMENDED for brightfield H&E: 'hematoxylin_watershed' (robust, no ML required)
# Options:
#   - 'hematoxylin_watershed': Hematoxylin extraction + CLAHE + watershed (RECOMMENDED)
#   - 'hematoxylin_instanseg': Hematoxylin extraction + CLAHE + InstanSeg (best with GPU)
#   - 'hematoxylin_adaptive': Hematoxylin extraction + adaptive threshold (fastest)
#   - 'instanseg': Original InstanSeg on RGB (legacy)
#   - 'classical': Simple thresholding (fallback)
#   - 'auto': Auto-select best available backend
backend = 'hematoxylin_watershed'  # RECOMMENDED for brightfield H&E

# ===== Common Segmentation Parameters =====
min_cell_size = 32  # Remove objects smaller than this (in pixels)
max_cell_size = 5000  # Remove objects larger than this (in pixels)

# ===== Enhanced Nuclear Segmentation Parameters =====
# (used by hematoxylin_* backends)
stain_matrix = 'he_standard'  # 'he_standard', 'he_ruifrok', or 'dab'
clahe_clip_limit = 3.0  # CLAHE contrast enhancement (higher = more contrast)
min_distance = 10  # Minimum distance between nuclei for watershed

# ===== InstanSeg Parameters =====
# (used by 'instanseg' and 'hematoxylin_instanseg' backends)
instanseg_model = 'brightfield_nuclei'  # 'brightfield_nuclei' or 'fluorescence_nuclei_and_cells'
tile_size = 1024  # Tile size in pixels (512, 1024, or 2048)
tile_overlap = 64  # Overlap between tiles (prevents edge artifacts)
batch_size = 4  # Number of tiles to process in parallel (GPU-dependent)
pixel_size = None  # Physical pixel size in microns (auto-detected if None)
normalization = True  # Apply intensity normalization
image_reader = 'tiffslide'  # Image reading backend ('tiffslide', 'openslide')

# Legacy parameters (for 'classical' backend only)
probability_threshold = 0.5  # NOT used by InstanSeg or enhanced backends

# ===== Marker Detection (for fluorescence) =====
marker_channels = {'insulin': 0, 'glucagon': 1, 'CD3': 2}
marker_thresholds = {'insulin': 80, 'glucagon': 80, 'CD3': 40}

# ===== Visualization =====
# Closeup regions (y_start, y_end, x_start, x_end) as fractions of image dimensions
closeup_regions = [
    (0.2, 0.4, 0.3, 0.5),  # Region 1
    (0.5, 0.7, 0.6, 0.8),  # Region 2
]

# ===== Output =====
output_dir = Path('../outputs')
output_dir.mkdir(exist_ok=True)

print('Configuration set')
print(f'Using backend: {backend}')

## Image Loading

In [None]:
def load_image(path: str, max_dim: int = 2000):
    """Load image; for WSI use OpenSlide at downsampled level."""
    p = Path(path)
    suffix = p.suffix.lower()
    
    if _OPENSLIDE_AVAILABLE and suffix in {'.svs', '.tif', '.tiff', '.ndpi', '.scn'}:
        slide = openslide.OpenSlide(str(p))
        level = len(slide.level_dimensions) - 1
        for i, (w, h) in enumerate(slide.level_dimensions):
            if max(w, h) <= max_dim:
                level = i
                break
        region = slide.read_region((0, 0), level, slide.level_dimensions[level])
        img = cv2.cvtColor(np.array(region.convert('RGB')), cv2.COLOR_RGB2BGR)
        slide.close()
        return img
    
    img = cv2.imread(str(p))
    if img is None:
        raise FileNotFoundError(f'Cannot load image: {path}')
    return img

def get_closeup_coords(img_shape, region_frac):
    """Convert fractional coordinates to pixel coordinates."""
    h, w = img_shape[:2]
    y1, y2, x1, x2 = region_frac
    return (int(y1*h), int(y2*h), int(x1*w), int(x2*w))

print('Helper functions defined')

In [None]:
img_original = load_image(image_path, max_dim=max_dimension)
print(f'Loaded image: {img_original.shape} ({img_original.dtype})')
print(f'Image size: {img_original.shape[1]} x {img_original.shape[0]} pixels')
print(f'Memory: {img_original.nbytes / 1e6:.1f} MB')

## Preprocessing

For **hematoxylin_* backends**: We use true color deconvolution to extract the hematoxylin channel
(nuclear signal), then apply CLAHE for contrast enhancement. This is more effective than standard
stain normalization because it isolates the nuclear signal rather than just standardizing colors.

For **legacy backends** (instanseg, classical): Optional Macenko/Vahadane stain normalization.

In [None]:
# Preprocessing depends on the backend chosen
is_hematoxylin_backend = backend.startswith('hematoxylin_')

if is_hematoxylin_backend and _NUCLEAR_SEG_AVAILABLE:
    # Enhanced preprocessing: hematoxylin extraction + CLAHE
    print('Running enhanced nuclear preprocessing (hematoxylin extraction + CLAHE)...')
    preprocessor = NuclearPreprocessor(
        stain_matrix=stain_matrix,
        clahe_clip_limit=clahe_clip_limit,
    )
    prep_result = preprocessor.preprocess(img_original, return_intermediate=True)
    hematoxylin_channel = prep_result['hematoxylin']
    eosin_channel = prep_result['eosin']
    enhanced_channel = prep_result['enhanced']
    img_normalized = img_original.copy()  # Keep original for visualization
    print('  Hematoxylin channel extracted')
    print('  CLAHE contrast enhancement applied')
    print('Enhanced preprocessing complete')
    
elif use_stain_norm and image_type == 'brightfield':
    # Legacy preprocessing: Macenko/Vahadane stain normalization
    print(f'Running {stain_method} normalization...')
    normalizer = StainNormalizer(method=stain_method, use_gpu=use_gpu)
    normalizer.estimate_stain_matrix(img_original)
    img_normalized = normalizer.normalize(img_original)
    hematoxylin_channel = None
    eosin_channel = None
    enhanced_channel = None
    print('Stain normalization complete')
else:
    img_normalized = img_original.copy()
    hematoxylin_channel = None
    eosin_channel = None
    enhanced_channel = None
    print('Skipping preprocessing')

In [None]:
# Visualization: Preprocessing results
if hematoxylin_channel is not None:
    # Enhanced preprocessing visualization (4 panels)
    fig, axes = plt.subplots(2, 2, figsize=(16, 16))
    
    # Original image
    axes[0, 0].imshow(cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB))
    axes[0, 0].set_title('Original Image', fontsize=14, fontweight='bold')
    axes[0, 0].axis('off')
    
    # Hematoxylin channel (nuclei)
    axes[0, 1].imshow(hematoxylin_channel, cmap='gray')
    axes[0, 1].set_title('Hematoxylin Channel (Nuclei)', fontsize=14, fontweight='bold')
    axes[0, 1].axis('off')
    
    # Eosin channel (cytoplasm/background)
    axes[1, 0].imshow(eosin_channel, cmap='Reds')
    axes[1, 0].set_title('Eosin Channel (Cytoplasm)', fontsize=14, fontweight='bold')
    axes[1, 0].axis('off')
    
    # Enhanced (CLAHE) hematoxylin
    axes[1, 1].imshow(enhanced_channel, cmap='gray')
    axes[1, 1].set_title('Enhanced (CLAHE) Hematoxylin', fontsize=14, fontweight='bold')
    axes[1, 1].axis('off')
    
    # Draw rectangles showing closeup regions on enhanced image
    for i, region_frac in enumerate(closeup_regions):
        y1, y2, x1, x2 = get_closeup_coords(img_original.shape, region_frac)
        rect = Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='red', facecolor='none')
        axes[1, 1].add_patch(rect)
        axes[1, 1].text(x1, y1-10, f'Region {i+1}', color='red', fontsize=10, fontweight='bold')

else:
    # Legacy stain normalization visualization (2 panels)
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    axes[0].imshow(cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB))
    axes[0].set_title('Original Image', fontsize=14, fontweight='bold')
    axes[0].axis('off')
    
    axes[1].imshow(cv2.cvtColor(img_normalized, cv2.COLOR_BGR2RGB))
    axes[1].set_title('Stain Normalized', fontsize=14, fontweight='bold')
    axes[1].axis('off')
    
    # Draw rectangles showing closeup regions
    for i, region_frac in enumerate(closeup_regions):
        y1, y2, x1, x2 = get_closeup_coords(img_original.shape, region_frac)
        rect = Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='red', facecolor='none')
        axes[1].add_patch(rect)
        axes[1].text(x1, y1-10, f'Region {i+1}', color='red', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig(output_dir / '01_preprocessing_full.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Closeup preprocessing comparison
n_regions = len(closeup_regions)

if hematoxylin_channel is not None:
    # Enhanced preprocessing closeups (3 columns: original, hematoxylin, enhanced)
    fig, axes = plt.subplots(n_regions, 3, figsize=(15, 5*n_regions))
    if n_regions == 1:
        axes = axes.reshape(1, -1)
    
    for i, region_frac in enumerate(closeup_regions):
        y1, y2, x1, x2 = get_closeup_coords(img_original.shape, region_frac)
        
        orig_crop = img_original[y1:y2, x1:x2]
        hema_crop = hematoxylin_channel[y1:y2, x1:x2]
        enhanced_crop = enhanced_channel[y1:y2, x1:x2]
        
        axes[i, 0].imshow(cv2.cvtColor(orig_crop, cv2.COLOR_BGR2RGB))
        axes[i, 0].set_title(f'Region {i+1} - Original', fontsize=12, fontweight='bold')
        axes[i, 0].axis('off')
        
        axes[i, 1].imshow(hema_crop, cmap='gray')
        axes[i, 1].set_title(f'Region {i+1} - Hematoxylin', fontsize=12, fontweight='bold')
        axes[i, 1].axis('off')
        
        axes[i, 2].imshow(enhanced_crop, cmap='gray')
        axes[i, 2].set_title(f'Region {i+1} - Enhanced (CLAHE)', fontsize=12, fontweight='bold')
        axes[i, 2].axis('off')

else:
    # Legacy stain normalization closeups (2 columns)
    fig, axes = plt.subplots(n_regions, 2, figsize=(12, 6*n_regions))
    if n_regions == 1:
        axes = axes.reshape(1, -1)
    
    for i, region_frac in enumerate(closeup_regions):
        y1, y2, x1, x2 = get_closeup_coords(img_original.shape, region_frac)
        
        orig_crop = img_original[y1:y2, x1:x2]
        norm_crop = img_normalized[y1:y2, x1:x2]
        
        axes[i, 0].imshow(cv2.cvtColor(orig_crop, cv2.COLOR_BGR2RGB))
        axes[i, 0].set_title(f'Region {i+1} - Original', fontsize=12, fontweight='bold')
        axes[i, 0].axis('off')
        
        axes[i, 1].imshow(cv2.cvtColor(norm_crop, cv2.COLOR_BGR2RGB))
        axes[i, 1].set_title(f'Region {i+1} - Normalized', fontsize=12, fontweight='bold')
        axes[i, 1].axis('off')

plt.tight_layout()
plt.savefig(output_dir / '01_preprocessing_closeup.png', dpi=150, bbox_inches='tight')
plt.show()

## Cell Segmentation

Segment individual cells/nuclei using InstanSeg or classical methods.

In [None]:
print(f'Running {backend} segmentation...')
segmenter = CellSegmenter(
    backend=backend,
    # Common parameters
    min_size=min_cell_size,
    max_size=max_cell_size,
    # Enhanced nuclear segmentation parameters
    stain_matrix=stain_matrix,
    clahe_clip_limit=clahe_clip_limit,
    min_distance=min_distance,
    # InstanSeg parameters
    use_instanseg=(backend in ['instanseg', 'hematoxylin_instanseg']),
    instanseg_model_name=instanseg_model,
    tile_size=tile_size,
    tile_overlap=tile_overlap,
    batch_size=batch_size,
    pixel_size=pixel_size,
    normalization=normalization,
    image_reader=image_reader,
    # Legacy parameters
    probability_threshold=probability_threshold,
)

# Segment cells with tissue detection
# Note: For hematoxylin_* backends, we pass the original image as preprocessing
# is handled internally by the enhanced segmenter
seg_result = segmenter.segment(
    img_original,  # Use original image - preprocessing is done by backend
    image_type=image_type,
    detect_tissue_first=detect_tissue_first,
)

cell_mask = seg_result['mask']
cell_labels = seg_result['labels']
tissue_mask = seg_result.get('tissue_mask')

n_cells = cell_labels.max()
print(f'\nSegmentation complete: {n_cells:,} cells detected')
print(f'Total cell area: {cell_mask.sum():,} pixels ({100*cell_mask.sum()/cell_mask.size:.1f}% of image)')

In [None]:
# Marker detection (fluorescence only)
markers = {}
if image_type == 'fluorescence' and marker_channels:
    print('Detecting markers...')
    markers = segmenter.detect_markers(
        img_normalized,
        cell_labels,
        marker_channels=marker_channels,
        thresholds=marker_thresholds,
        brighter_is_positive=True
    )
    for marker, mask in markers.items():
        print(f'  {marker}: {int(mask.sum())} positive cells')
else:
    print('Skipping marker detection (brightfield image)')

In [None]:
# Full image segmentation overlay
fig, axes = plt.subplots(2, 2, figsize=(16, 16))

# Original/normalized
axes[0, 0].imshow(cv2.cvtColor(img_normalized, cv2.COLOR_BGR2RGB))
axes[0, 0].set_title('Normalized Image', fontsize=14, fontweight='bold')
axes[0, 0].axis('off')

# Tissue mask
if tissue_mask is not None:
    axes[0, 1].imshow(tissue_mask, cmap='gray')
    tissue_pct = 100 * tissue_mask.sum() / tissue_mask.size
    axes[0, 1].set_title(f'Tissue Mask ({tissue_pct:.1f}% tissue)', fontsize=14, fontweight='bold')
    axes[0, 1].axis('off')
else:
    axes[0, 1].text(0.5, 0.5, 'No tissue mask', ha='center', va='center')
    axes[0, 1].axis('off')

# Cell mask
axes[1, 0].imshow(cell_mask, cmap='gray')
axes[1, 0].set_title(f'Cell Mask ({n_cells:,} cells)', fontsize=14, fontweight='bold')
axes[1, 0].axis('off')

# Overlay
overlay = cv2.cvtColor(img_normalized, cv2.COLOR_BGR2RGB).copy()
if tissue_mask is not None:
    # Show tissue boundary in blue
    tissue_boundary = cv2.Canny((tissue_mask * 255).astype(np.uint8), 100, 200)
    overlay[tissue_boundary > 0] = [0, 0, 255]  # Blue tissue boundary
# Show cells in red
overlay[cell_mask > 0] = overlay[cell_mask > 0] * 0.6 + np.array([255, 0, 0]) * 0.4
axes[1, 1].imshow(overlay)
axes[1, 1].set_title('Tissue (blue) + Cells (red)', fontsize=14, fontweight='bold')
axes[1, 1].axis('off')

# Draw closeup region boxes
for i, region_frac in enumerate(closeup_regions):
    y1, y2, x1, x2 = get_closeup_coords(img_original.shape, region_frac)
    rect = Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='yellow', facecolor='none')
    axes[1, 1].add_patch(rect)
    axes[1, 1].text(x1, y1-10, f'Region {i+1}', color='yellow', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig(output_dir / '02_cell_segmentation_full.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Closeup segmentation views
fig, axes = plt.subplots(n_regions, 3, figsize=(15, 5*n_regions))
if n_regions == 1:
    axes = axes.reshape(1, -1)

for i, region_frac in enumerate(closeup_regions):
    y1, y2, x1, x2 = get_closeup_coords(img_original.shape, region_frac)
    
    img_crop = img_normalized[y1:y2, x1:x2]
    mask_crop = cell_mask[y1:y2, x1:x2]
    labels_crop = cell_labels[y1:y2, x1:x2]
    
    # Image
    axes[i, 0].imshow(cv2.cvtColor(img_crop, cv2.COLOR_BGR2RGB))
    axes[i, 0].set_title(f'Region {i+1} - Image', fontsize=12, fontweight='bold')
    axes[i, 0].axis('off')
    
    # Labels (colored)
    from skimage.color import label2rgb
    labels_colored = label2rgb(labels_crop, bg_label=0)
    axes[i, 1].imshow(labels_colored)
    axes[i, 1].set_title(f'Region {i+1} - Cell Labels ({labels_crop.max()} cells)', fontsize=12, fontweight='bold')
    axes[i, 1].axis('off')
    
    # Overlay with boundaries
    from skimage.segmentation import find_boundaries
    overlay_crop = cv2.cvtColor(img_crop, cv2.COLOR_BGR2RGB).copy()
    boundaries = find_boundaries(labels_crop, mode='thick')
    overlay_crop[boundaries] = [255, 255, 0]  # Yellow boundaries
    axes[i, 2].imshow(overlay_crop)
    axes[i, 2].set_title(f'Region {i+1} - Cell Boundaries', fontsize=12, fontweight='bold')
    axes[i, 2].axis('off')

plt.tight_layout()
plt.savefig(output_dir / '02_cell_segmentation_closeup.png', dpi=150, bbox_inches='tight')
plt.show()

## Save Results

In [None]:
# Save outputs for next notebook
np.save(output_dir / 'img_normalized.npy', img_normalized)
np.save(output_dir / 'cell_mask.npy', cell_mask)
np.save(output_dir / 'cell_labels.npy', cell_labels)

if markers:
    for marker, mask in markers.items():
        np.save(output_dir / f'marker_{marker}.npy', mask)

print(f'Results saved to {output_dir}')
print('\nReady for notebook 02: Islet Detection & Radial Analysis')