In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import sys
import gc
import pickle
import shutil
import numpy as np
import pandas as pd
import tifffile
import json
import psutil
from pathlib import Path
from scipy import ndimage as ndi
from skimage import morphology
from skimage.measure import label, regionprops, find_contours
from tqdm.auto import tqdm
import time
from datetime import datetime

print("âœ“ Basic imports")

import torch
from micro_sam.util import get_sam_model
from micro_sam.instance_segmentation import (
    AutomaticMaskGenerator,
    mask_data_to_segmentation
)
print(f"âœ“ PyTorch: {torch.__version__}")
print(f"âœ“ Micro-SAM imported")
print(f"âœ“ Available RAM: {psutil.virtual_memory().available / (1024**3):.1f} GB\n")

In [None]:
# === CONFIGURATION ===
DATA_ROOT = Path(r"C:\Users\researcher\data\scimap-master\data")

# Only the 4 remaining samples
ALL_SAMPLES = [
    "sample_009", "sample_012", "sample_014", "sample_016"
]

# Optional skip list
SKIP_SAMPLES = []  # Set to ["sample_012"] to skip 13GB monster

# Model
MODEL_TYPE = 'vit_b'

# ADAPTIVE TILE SIZING - PIXEL-BASED
TILE_SIZE_NORMAL = 1024      # <60M pixels
TILE_SIZE_MONSTER = 512      # 60-80M pixels
TILE_SIZE_ULTRA = 384        # >80M pixels (prevents VS Code crash)
OVERLAP = 256
MIN_OBJECT_SIZE = 8000

# Pixel count thresholds for adaptive tile sizing
MONSTER_PIXEL_THRESHOLD = 60_000_000  # 60M pixels
ULTRA_PIXEL_THRESHOLD = 80_000_000    # 80M pixels (sample_016 = 90.6M)

# CHUNKED PROCESSING - ADAPTIVE BY IMAGE SIZE
MASK_BATCH_SIZE = 10  # Base: 10 masks
MASK_BATCH_SIZE_MONSTER = 6  # For huge images (60-80M px): 6 masks
MASK_BATCH_SIZE_ULTRA = 4    # For ultra images (>80M px): 4 masks â†’ 12 GB max
TILE_LOAD_BATCH = 10  # Base: 10 tiles
TILE_LOAD_BATCH_MONSTER = 5  # For huge images: 5 tiles
TILE_LOAD_BATCH_ULTRA = 3    # For ultra images: 3 tiles â†’ 10 GB max

# Disk caching
TEMP_DIR = Path(r"C:\Users\researcher\Downloads\Cycif_pipeline_V3\temp_tiles_v8")
TEMP_DIR.mkdir(parents=True, exist_ok=True)

# Use V5 checkpoint
CHECKPOINT_FILE = Path(r"C:\Users\researcher\Downloads\Cycif_pipeline_V3\batch_progress_v5.json")

# Filtering
MIN_AREA = 8000
MAX_AREA = 100000
MAX_ECCENTRICITY = 0.9

# Morphology
APPLY_MORPHOLOGY = True
MERGE_RADIUS = 15
SMOOTH_RADIUS = 5

# RAM safety - STRICT
MIN_FREE_RAM_GB = 8.0  # Don't start if <8GB free
MIN_SAFE_RAM_GB = 5.0  # Pause processing if drops below 5GB
PAUSE_BETWEEN_SAMPLES = 15  # Longer pause for GC

print(f"Data: {DATA_ROOT}")
print(f"Samples: {len(ALL_SAMPLES)}")
if SKIP_SAMPLES:
    print(f"âš ï¸  Skipping: {SKIP_SAMPLES}")
print(f"Adaptive tiles: {TILE_SIZE_NORMAL}px â†’ {TILE_SIZE_MONSTER}px â†’ {TILE_SIZE_ULTRA}px")
print(f"Pixel thresholds: <60M â†’ 60-80M â†’ >80M")
print(f"Mask batch: {MASK_BATCH_SIZE} â†’ {MASK_BATCH_SIZE_MONSTER} â†’ {MASK_BATCH_SIZE_ULTRA}")
print(f"Tile load: {TILE_LOAD_BATCH} â†’ {TILE_LOAD_BATCH_MONSTER} â†’ {TILE_LOAD_BATCH_ULTRA}")
print(f"Temp: {TEMP_DIR}")
print(f"Checkpoint: {CHECKPOINT_FILE}")
print(f"RAM gate: {MIN_FREE_RAM_GB} GB minimum, {MIN_SAFE_RAM_GB} GB safe threshold\n")

In [None]:
# === CHECKPOINT & RAM MANAGEMENT ===
def load_checkpoint():
    if CHECKPOINT_FILE.exists():
        try:
            with open(CHECKPOINT_FILE, 'r') as f:
                data = json.load(f)
            success_count = len([r for r in data.get('results', []) if r.get('status') == 'success'])
            print(f"âœ“ Checkpoint: {success_count} successful")
            return data
        except Exception as e:
            print(f"âš ï¸  Checkpoint error: {e}")
    return {'completed': [], 'failed': [], 'results': []}

def save_checkpoint(data):
    for attempt in range(3):
        try:
            with open(CHECKPOINT_FILE, 'w') as f:
                json.dump(data, f, indent=2)
            return
        except Exception as e:
            if attempt == 2:
                print(f"âš ï¸  Checkpoint save failed: {e}")
            time.sleep(0.5)

def get_remaining_samples(checkpoint_data):
    successful = set()
    for result in checkpoint_data.get('results', []):
        if result.get('status') == 'success':
            successful.add(result['sample_id'])
    return [s for s in ALL_SAMPLES if s not in successful and s not in SKIP_SAMPLES]

def check_ram(min_required=MIN_FREE_RAM_GB, context=""):
    mem = psutil.virtual_memory()
    free_gb = mem.available / (1024**3)
    if free_gb < min_required:
        print(f"âš ï¸  LOW RAM: {free_gb:.1f} GB (need {min_required} GB) - {context}")
    return free_gb

def wait_for_ram(min_required=MIN_SAFE_RAM_GB, max_wait=60):
    """Wait for RAM to free up."""
    start = time.time()
    while time.time() - start < max_wait:
        free_gb = psutil.virtual_memory().available / (1024**3)
        if free_gb >= min_required:
            return True
        print(f"    Waiting for RAM... ({free_gb:.1f} GB free)")
        gc.collect()
        time.sleep(5)
    return False

def get_adaptive_tile_size_by_pixels(num_pixels):
    """Pixel-based adaptive tile sizing to prevent VS Code crashes"""
    if num_pixels > ULTRA_PIXEL_THRESHOLD:
        return TILE_SIZE_ULTRA, "ULTRA"
    elif num_pixels > MONSTER_PIXEL_THRESHOLD:
        return TILE_SIZE_MONSTER, "MONSTER"
    else:
        return TILE_SIZE_NORMAL, "NORMAL"

print("âœ“ Checkpoint & RAM management ready")

In [None]:
# === SORT BY SIZE (PIXEL-BASED CATEGORIZATION) ===
sample_sizes = []
for sample_id in ALL_SAMPLES:
    if sample_id in SKIP_SAMPLES:
        continue
    input_path = DATA_ROOT / sample_id / "AF_removal" / "fused_decon_AF_cleaned.ome.tif"
    if input_path.exists():
        size_mb = input_path.stat().st_size / (1024 * 1024)
        # Get image dimensions to determine tile size
        try:
            with tifffile.TiffFile(input_path) as tif:
                img_shape = tif.series[0].shape
                if len(img_shape) == 3:  # (C, H, W)
                    h, w = img_shape[1], img_shape[2]
                elif len(img_shape) == 2:  # (H, W)
                    h, w = img_shape
                else:
                    h, w = 0, 0
                num_pixels = h * w
                tile_size, category = get_adaptive_tile_size_by_pixels(num_pixels)
                sample_sizes.append((sample_id, size_mb, tile_size, category, num_pixels, h, w))
        except Exception as e:
            print(f"âš ï¸  {sample_id}: Could not read dimensions: {e}")
            sample_sizes.append((sample_id, size_mb, 1024, "UNKNOWN", 0, 0, 0))
    else:
        sample_sizes.append((sample_id, float('inf'), 0, "MISSING", 0, 0, 0))

sample_sizes.sort(key=lambda x: x[1])
SORTED_SAMPLES = [s[0] for s in sample_sizes]

print("Samples (smallest first):")
for sid, size, tiles, cat, npx, h, w in sample_sizes:
    if size == float('inf'):
        print(f"  {sid}: MISSING")
    elif npx > 0:
        mpx = npx / 1_000_000
        print(f"  {sid}: {size:.1f} MB, {h}Ã—{w} ({mpx:.1f}M px) â†’ {tiles}px tiles ({cat})")
    else:
        print(f"  {sid}: {size:.1f} MB â†’ {tiles}px tiles ({cat})")
print()

In [None]:
# === LOAD MODEL ===
check_ram(context="before model load")
print(f"Loading {MODEL_TYPE}...")
predictor = get_sam_model(model_type=MODEL_TYPE, checkpoint_path=None, device='cpu')
print("âœ“ Model loaded")
check_ram(context="after model load")
print()

In [None]:
# === HELPER FUNCTIONS ===

def rle_encode(mask):
    """RLE encoding for boolean mask."""
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return runs

def rle_decode(rle, shape):
    """Decode RLE to boolean mask."""
    starts, lengths = rle[::2], rle[1::2]
    starts -= 1
    ends = starts + lengths
    mask = np.zeros(shape[0] * shape[1], dtype=bool)
    for start, end in zip(starts, ends):
        mask[start:end] = True
    return mask.reshape(shape)


def find_laminin_channel(sample_dir, sample_id):
    marker_csv = sample_dir / f"markers_{sample_id.split('_')[-1]}.csv"
    if not marker_csv.exists():
        candidates = list(sample_dir.glob("markers_*.csv"))
        if not candidates:
            return None, None, "Marker CSV not found"
        marker_csv = candidates[0]
    
    try:
        markers_df = pd.read_csv(marker_csv)
    except Exception as e:
        return None, None, f"CSV error: {e}"
    
    marker_col = None
    for col in ['Marker-Name', 'marker_name', 'Marker', 'marker', 'name', 'Name']:
        if col in markers_df.columns:
            marker_col = col
            break
    
    if not marker_col:
        return None, None, "Marker column not found"
    
    laminin_rows = markers_df[markers_df[marker_col].fillna('').str.upper().str.contains('LAMININ')]
    if laminin_rows.empty:
        return None, None, "Laminin not found"
    
    include_col = None
    for col in ['Include', 'include', 'included', 'Included']:
        if col in markers_df.columns:
            include_col = col
            break
    
    if include_col:
        included = laminin_rows[laminin_rows[include_col] == True]
        row = included.iloc[0] if not included.empty else laminin_rows.iloc[0]
    else:
        row = laminin_rows.iloc[0]
    
    csv_pos = markers_df.index.get_loc(row.name)
    
    if include_col:
        mask = markers_df[include_col] == True
        idx = len(markers_df.iloc[:csv_pos][mask.iloc[:csv_pos]])
    else:
        idx = csv_pos
    
    return idx, row[marker_col], None


def extract_channel_memmap(tiff_path, channel_idx):
    try:
        with tifffile.TiffFile(tiff_path) as tif:
            series = tif.series[0]
            if channel_idx >= len(series):
                raise ValueError(f"Channel {channel_idx} out of range")
            
            page = series.pages[channel_idx]
            img = page.asarray()
            
            img_float = img.astype(np.float32)
            del img
            gc.collect()
            
            vmin, vmax = np.percentile(img_float, [1, 99.5])
            img_norm = np.clip((img_float - vmin) / (vmax - vmin), 0, 1)
            del img_float
            gc.collect()
            
            img_uint8 = (img_norm * 255).astype(np.uint8)
            del img_norm
            gc.collect()
            
            return img_uint8, None
    except Exception as e:
        return None, f"Channel extraction error: {e}"


def segment_with_tiles_rle_compressed(image_uint8, predictor, sample_id, tile_size, overlap=256):
    """Segment tiles and save with RLE compression."""
    h, w = image_uint8.shape
    n_y = int(np.ceil(h / (tile_size - overlap)))
    n_x = int(np.ceil(w / (tile_size - overlap)))
    total = n_y * n_x
    
    print(f"    Tiles: {total} ({n_y}Ã—{n_x} at {tile_size}px)")
    
    temp_sample = TEMP_DIR / sample_id
    temp_sample.mkdir(parents=True, exist_ok=True)
    
    tile_files = []
    skipped = 0
    
    with tqdm(total=total, desc="  Segmenting tiles", leave=False) as pbar:
        for i in range(n_y):
            for j in range(n_x):
                # Check RAM every 5 tiles
                if (i * n_x + j) % 5 == 0:
                    free_gb = check_ram(MIN_SAFE_RAM_GB, f"tile [{i},{j}]")
                    if free_gb < MIN_SAFE_RAM_GB:
                        print(f"\n    Waiting for RAM...")
                        wait_for_ram(MIN_SAFE_RAM_GB)
                
                y0 = i * (tile_size - overlap)
                x0 = j * (tile_size - overlap)
                y1 = min(y0 + tile_size, h)
                x1 = min(x0 + tile_size, w)
                
                tile = image_uint8[y0:y1, x0:x1].copy()
                
                try:
                    gen = AutomaticMaskGenerator(predictor)
                    gen.initialize(tile)
                    masks = gen.generate()
                    
                    # RLE compress each mask
                    compact_masks = []
                    for m in masks:
                        seg_tile = m['segmentation']  # bool array in tile coords
                        bbox_tile = m['bbox']  # [x, y, w, h] in tile coords
                        
                        # Extract bbox region only
                        x_t, y_t, w_t, h_t = bbox_tile
                        seg_bbox = seg_tile[y_t:y_t+h_t, x_t:x_t+w_t]
                        
                        # RLE encode
                        rle = rle_encode(seg_bbox)
                        
                        # Store compact data
                        compact_masks.append({
                            'rle': rle,
                            'bbox_shape': (h_t, w_t),
                            'bbox_global': [x_t + x0, y_t + y0, w_t, h_t],
                            'area': m['area'],
                            'predicted_iou': m.get('predicted_iou', 0.0)
                        })
                    
                    # Save compressed tile
                    pkl_file = temp_sample / f"tile_{i:03d}_{j:03d}.pkl"
                    with open(pkl_file, 'wb') as f:
                        pickle.dump(compact_masks, f, protocol=4)
                    tile_files.append(pkl_file)
                    
                    del masks, gen, tile, compact_masks, seg_tile, seg_bbox
                    gc.collect()
                    
                except Exception as e:
                    skipped += 1
                    print(f"\n    âš ï¸  Tile [{i},{j}]: {type(e).__name__}: {str(e)[:80]}")
                    del tile
                    gc.collect()
                
                pbar.update(1)
    
    if skipped > 0:
        print(f"    âš ï¸  {skipped} tiles skipped")
    
    return tile_files, (h, w)


def load_tiles_chunked(tile_files, img_shape, chunk_size=None):
    """Load and decode RLE tiles in chunks to avoid RAM overflow."""
    h, w = img_shape
    
    # Auto-determine chunk size based on image size
    if chunk_size is None:
        img_pixels = h * w
        if img_pixels > 80_000_000:  # >80M pixels (e.g., 9500Ã—9500)
            chunk_size = TILE_LOAD_BATCH_ULTRA
        elif img_pixels > 60_000_000:  # >60M pixels (e.g., 10000Ã—7000)
            chunk_size = TILE_LOAD_BATCH_MONSTER
        else:
            chunk_size = TILE_LOAD_BATCH
    
    all_masks = []
    n_chunks = int(np.ceil(len(tile_files) / chunk_size))
    
    print(f"    Loading {len(tile_files)} tiles in {n_chunks} chunks of {chunk_size}...")
    
    for chunk_idx in range(n_chunks):
        start_idx = chunk_idx * chunk_size
        end_idx = min((chunk_idx + 1) * chunk_size, len(tile_files))
        chunk_files = tile_files[start_idx:end_idx]
        
        print(f"      Chunk {chunk_idx+1}/{n_chunks}: {len(chunk_files)} tiles")
        
        for pkl_file in chunk_files:
            try:
                with open(pkl_file, 'rb') as f:
                    compact_masks = pickle.load(f)
                
                # Decode each mask
                for cm in compact_masks:
                    # Decode RLE to bbox-sized mask
                    seg_bbox = rle_decode(cm['rle'], cm['bbox_shape'])
                    
                    # Create full-size sparse mask
                    seg_full = np.zeros((h, w), dtype=bool)
                    x_g, y_g, w_g, h_g = cm['bbox_global']
                    seg_full[y_g:y_g+h_g, x_g:x_g+w_g] = seg_bbox
                    
                    all_masks.append({
                        'segmentation': seg_full,
                        'bbox': cm['bbox_global'],
                        'area': cm['area'],
                        'predicted_iou': cm['predicted_iou'],
                        'stability_score': 0.0
                    })
                    
                    del seg_bbox, seg_full
                
                del compact_masks
                pkl_file.unlink()  # Delete as we go
                
            except Exception as e:
                print(f"      âš ï¸  Load {pkl_file.name}: {e}")
        
        # GC after each chunk
        gc.collect()
    
    return all_masks


def masks_to_segmentation_chunked(masks, img_shape, batch_size=None, min_object_size=MIN_OBJECT_SIZE):
    """Process masks in batches to avoid RAM explosion."""
    h, w = img_shape
    
    # Auto-determine batch size based on image size
    if batch_size is None:
        img_pixels = h * w
        if img_pixels > 80_000_000:  # >80M pixels
            batch_size = MASK_BATCH_SIZE_ULTRA
        elif img_pixels > 60_000_000:  # >60M pixels
            batch_size = MASK_BATCH_SIZE_MONSTER
        else:
            batch_size = MASK_BATCH_SIZE
    
    print(f"    Converting {len(masks)} masks in batches of {batch_size}...")
    
    final_seg = np.zeros((h, w), dtype=np.uint32)
    current_label = 1
    
    num_batches = int(np.ceil(len(masks) / batch_size))
    
    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = min((batch_idx + 1) * batch_size, len(masks))
        batch_masks = masks[start_idx:end_idx]
        
        print(f"      Batch {batch_idx+1}/{num_batches}: {len(batch_masks)} masks")
        
        # Check RAM
        free_gb = check_ram(MIN_SAFE_RAM_GB, f"batch {batch_idx+1}")
        if free_gb < MIN_SAFE_RAM_GB:
            wait_for_ram(MIN_SAFE_RAM_GB)
        
        try:
            # Convert batch
            batch_seg = mask_data_to_segmentation(
                batch_masks, 
                with_background=True, 
                min_object_size=min_object_size
            )
            
            # Merge into final
            for label_id in np.unique(batch_seg):
                if label_id == 0:
                    continue
                mask = batch_seg == label_id
                final_seg[mask & (final_seg == 0)] = current_label
                current_label += 1
            
            del batch_seg, batch_masks
            gc.collect()
            
        except Exception as e:
            print(f"      âš ï¸  Batch {batch_idx+1} failed: {e}")
            del batch_masks
            gc.collect()
            continue
    
    print(f"    â†’ {current_label - 1} objects")
    return final_seg


def filter_by_size_shape(seg, min_a, max_a, max_e):
    props = regionprops(seg)
    filt = np.zeros_like(seg)
    for p in props:
        if min_a <= p.area <= max_a and p.eccentricity <= max_e:
            filt[seg == p.label] = p.label
    return label(filt > 0)


def apply_morphological_refinement(mask, min_a, merge_r=15, smooth_r=5):
    binary = mask > 0
    binary = morphology.binary_closing(binary, morphology.disk(merge_r))
    binary = ndi.binary_fill_holes(binary)
    binary = morphology.binary_closing(binary, morphology.disk(smooth_r))
    binary = morphology.remove_small_objects(binary, min_size=min_a)
    return label(binary)


def save_outputs(final_mask, output_dir, sample_id, marker_name):
    output_dir.mkdir(parents=True, exist_ok=True)
    
    bin_path = output_dir / "crypt_mask_microsam.tif"
    lab_path = output_dir / "crypt_mask_microsam_labeled.tif"
    tifffile.imwrite(bin_path, ((final_mask > 0).astype(np.uint8) * 255), compression='zlib')
    tifffile.imwrite(lab_path, final_mask.astype(np.uint16), compression='zlib')
    
    reg_path = output_dir / "segment_registry.csv"
    entries = []
    for cid in np.unique(final_mask[final_mask > 0]):
        entries.append({
            'segment_id': int(cid),
            'segment_type': 'crypt_microsam',
            'mask_file': lab_path.name,
            'global_unique_id': f"M_{int(cid)}",
            'sample_id': sample_id,
            'marker': marker_name,
            'method': 'MicroSAM'
        })
    
    reg_df = pd.DataFrame(entries)
    if reg_path.exists():
        exist = pd.read_csv(reg_path)
        exist = exist[~((exist['segment_type'] == 'crypt_microsam') & (exist['sample_id'] == sample_id))]
        reg_df = pd.concat([exist, reg_df], ignore_index=True)
    reg_df.to_csv(reg_path, index=False)
    
    geo_path = output_dir / f"{sample_id}_crypts_microsam_qupath.geojson"
    features = []
    for p in regionprops(final_mask):
        cb = (final_mask == p.label).astype(np.uint8)
        contours = find_contours(cb, level=0.5)
        if not contours:
            continue
        cnt = max(contours, key=len)
        coords = [[float(x), float(y)] for y, x in cnt]
        if coords[0] != coords[-1]:
            coords.append(coords[0])
        
        features.append({
            "type": "Feature",
            "id": f"M_{p.label}",
            "geometry": {"type": "Polygon", "coordinates": [coords]},
            "properties": {
                "classification": {"name": "Crypt", "colorRGB": -3140401},
                "object_type": "annotation",
                "name": f"Crypt_{p.label}",
                "isLocked": False,
                "measurements": {
                    "Area_um2": float(p.area),
                    "Perimeter_um": float(p.perimeter),
                    "Eccentricity": float(p.eccentricity),
                    "Solidity": float(p.solidity)
                },
                "segment_id": int(p.label),
                "global_id": f"M_{p.label}",
                "sample_id": sample_id,
                "method": "MicroSAM"
            }
        })
    
    with open(geo_path, 'w') as f:
        json.dump({"type": "FeatureCollection", "features": features}, f, indent=2)
    
print("âœ“ Helper functions ready (RLE + triple-chunked: Normal/Monster/Ultra)")
print("âœ“ Helper functions ready (RLE + double-chunked processing)")
print("âœ“ Helper functions ready (RLE + double-chunked processing)")

In [None]:
# === BATCH PROCESSING V8 ===

checkpoint = load_checkpoint()
remaining = get_remaining_samples(checkpoint)
remaining_sorted = [s for s in SORTED_SAMPLES if s in remaining]

if not remaining_sorted:
    print("\nâœ“ ALL SAMPLES COMPLETED!\n")
else:
    print("\n" + "="*80)
    print(f"BATCH V8 - {len(remaining_sorted)} SAMPLES")
    print("="*80)
    print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Remaining: {remaining_sorted}\n")
    
    overall_start = time.time()
    
    for idx, sample_id in enumerate(remaining_sorted, 1):
        print(f"\n[{idx}/{len(remaining_sorted)}] {sample_id}...")
        
        # RAM GATE
        free_gb = check_ram(MIN_FREE_RAM_GB, "start gate")
        if free_gb < MIN_FREE_RAM_GB:
            print(f"  âœ— SKIP: Insufficient RAM ({free_gb:.1f} GB)")
            print("    Please restart kernel")
            checkpoint['results'].append({
                'sample_id': sample_id,
                'status': 'failed',
                'num_crypts': 0,
                'time_seconds': 0,
                'error': f'Insufficient RAM: {free_gb:.1f} GB'
            })
            save_checkpoint(checkpoint)
            continue
        
        sample_start = time.time()
        
        try:
            sample_dir = DATA_ROOT / sample_id
            input_path = sample_dir / "AF_removal" / "fused_decon_AF_cleaned.ome.tif"
            output_dir = sample_dir / "crypt_segmentation"
            
            if not input_path.exists():
                print("  âš ï¸  SKIP: Input missing")
                checkpoint['results'].append({
                    'sample_id': sample_id,
                    'status': 'failed',
                    'num_crypts': 0,
                    'time_seconds': time.time() - sample_start,
                    'error': 'Input missing'
                })
                save_checkpoint(checkpoint)
                continue
            
            size_mb = input_path.stat().st_size / (1024 * 1024)
            
            # Get image dimensions for pixel-based tile sizing
            with tifffile.TiffFile(input_path) as tif:
                img_shape = tif.series[0].shape
                if len(img_shape) == 3:  # (C, H, W)
                    h, w = img_shape[1], img_shape[2]
                else:  # (H, W)
                    h, w = img_shape
                num_pixels = h * w
            
            tile_size, category = get_adaptive_tile_size_by_pixels(num_pixels)
            mpx = num_pixels / 1_000_000
            print(f"  File: {size_mb:.1f} MB, {h}Ã—{w} ({mpx:.1f}M px) â†’ {tile_size}px tiles ({category})")
            
            print("  Finding Laminin...")
            ch_idx, marker, error = find_laminin_channel(sample_dir, sample_id)
            if error:
                print(f"  âš ï¸  SKIP: {error}")
                checkpoint['results'].append({
                    'sample_id': sample_id,
                    'status': 'failed',
                    'num_crypts': 0,
                    'time_seconds': time.time() - sample_start,
                    'error': error
                })
                save_checkpoint(checkpoint)
                continue
            print(f"    '{marker}' at ch{ch_idx}")
            
            print("  Extracting channel (memory-mapped)...")
            img, error = extract_channel_memmap(input_path, ch_idx)
            if error:
                print(f"  âš ï¸  SKIP: {error}")
                checkpoint['results'].append({
                    'sample_id': sample_id,
                    'status': 'failed',
                    'num_crypts': 0,
                    'time_seconds': time.time() - sample_start,
                    'error': error
                })
                save_checkpoint(checkpoint)
                continue
            
            print(f"    {img.shape[0]}Ã—{img.shape[1]} px")
            img_shape = img.shape
            
            # PHASE 1: Segment & compress tiles
            print(f"  Segmenting with {tile_size}px tiles (RLE compression)...")
            tile_files, _ = segment_with_tiles_rle_compressed(img, predictor, sample_id, tile_size, OVERLAP)
            print(f"    â†’ {len(tile_files)} tiles saved")
            
            del img
            gc.collect()
            
            if len(tile_files) == 0:
                print("  âš ï¸  SKIP: No tiles")
                checkpoint['results'].append({
                    'sample_id': sample_id,
                    'status': 'failed',
                    'num_crypts': 0,
                    'time_seconds': time.time() - sample_start,
                    'error': 'No tiles'
                })
                save_checkpoint(checkpoint)
                continue
            
            # PHASE 2: Load tiles in chunks (auto-adaptive chunk size)
            print("  Loading tiles (chunked + RLE decode)...")
            masks = load_tiles_chunked(tile_files, img_shape)  # Auto-selects chunk size
            print(f"    â†’ {len(masks)} masks loaded")
            
            if len(masks) == 0:
                print("  âš ï¸  SKIP: No masks")
                checkpoint['results'].append({
                    'sample_id': sample_id,
                    'status': 'failed',
                    'num_crypts': 0,
                    'time_seconds': time.time() - sample_start,
                    'error': 'No masks'
                })
                save_checkpoint(checkpoint)
                continue
            # PHASE 3: Convert to segmentation in batches (auto-adaptive)
            print("  Converting to segmentation (chunked batches)...")
            seg = masks_to_segmentation_chunked(masks, img_shape)  # Auto-selects batch size
            
            del masks
            gc.collect()
            
            print("  Filtering by size & shape...")
            filt = filter_by_size_shape(seg, MIN_AREA, MAX_AREA, MAX_ECCENTRICITY)
            print(f"    â†’ {int(filt.max())} crypts")
            
            del seg
            gc.collect()
            
            if APPLY_MORPHOLOGY:
                print("  Morphological refinement...")
                final = apply_morphological_refinement(filt, MIN_AREA, MERGE_RADIUS, SMOOTH_RADIUS)
                n_final = int(final.max())
                print(f"    â†’ {n_final} crypts")
            else:
                final = filt
                n_final = int(final.max())
            
            del filt
            gc.collect()
            
            print("  Saving outputs...")
            n_crypts = save_outputs(final, output_dir, sample_id, marker)
            
            del final
            gc.collect()
            
            elapsed = time.time() - sample_start
            print(f"  âœ… {n_crypts} crypts in {elapsed:.1f}s ({elapsed/60:.1f} min)")
            
            checkpoint['results'].append({
                'sample_id': sample_id,
                'status': 'success',
                'num_crypts': n_crypts,
                'time_seconds': elapsed,
                'file_size_mb': size_mb,
                'tile_size': tile_size,
                'category': category,
                'marker': marker
            })
            save_checkpoint(checkpoint)
            
            # Cleanup temp
            try:
                temp_sample = TEMP_DIR / sample_id
                if temp_sample.exists():
                    shutil.rmtree(temp_sample)
            except:
                pass
            
            # RECOVERY PAUSE
            if idx < len(remaining_sorted):
                print(f"\n  â¸  Pause {PAUSE_BETWEEN_SAMPLES}s for GC...")
                gc.collect()
                time.sleep(PAUSE_BETWEEN_SAMPLES)
            
        except Exception as e:
            elapsed = time.time() - sample_start
            error_msg = f"{type(e).__name__}: {e}"
            print(f"  âœ— ERROR: {error_msg}")
            checkpoint['results'].append({
                'sample_id': sample_id,
                'status': 'failed',
                'num_crypts': 0,
                'time_seconds': elapsed,
                'error': error_msg
            })
            save_checkpoint(checkpoint)
        
        finally:
            gc.collect()
    
    total_time = time.time() - overall_start
    
    print("\n" + "="*80)
    print("COMPLETE")
    print("="*80)
    print(f"Total: {total_time/60:.1f} min ({total_time/3600:.1f} h)")
    
    success = [r for r in checkpoint['results'] if r.get('status') == 'success']
    failed = [r for r in checkpoint['results'] if r.get('status') == 'failed']
    
    print(f"Success: {len(success)}")
    print(f"Failed: {len(failed)}")
    
    if failed:
        print("\nFailed:")
        for r in failed:
            print(f"  - {r['sample_id']}: {r.get('error', 'Unknown')}")

In [None]:
# === SUMMARY ===
print("\n" + "="*80)
print("V8 BATCH SUMMARY")
print("="*80)

if CHECKPOINT_FILE.exists():
    with open(CHECKPOINT_FILE, 'r') as f:
        data = json.load(f)
    
    results = data.get('results', [])
    if results:
        df = pd.DataFrame(results)
        print(df.to_string())
        
        csv_path = Path(r"C:\Users\researcher\Downloads\Cycif_pipeline_V3\microsam_batch_summary_v8.csv")
        df.to_csv(csv_path, index=False)
        print(f"\nâœ“ Saved: {csv_path}")
    else:
        print("No results.")
else:
    print("No checkpoint.")

# Cleanup temp
if TEMP_DIR.exists():
    try:
        shutil.rmtree(TEMP_DIR)
        print(f"\nâœ“ Cleaned: {TEMP_DIR}")
    except Exception as e:
        print(f"\nâš ï¸  Cleanup: {e}")

print(f"\nFinal RAM: {psutil.virtual_memory().available / (1024**3):.1f} GB free")