# HE ROI Classification: internal / edge / outside

This notebook counts cells and ImageJ ROIs for a single image and classifies each ROI as one of three groups:
- **internal**: ROI inside a cell and at least `THRESHOLD` pixels away from the boundary
- **edge**: ROI within `THRESHOLD` pixels of the boundary (either inside or outside)
- **outside**: ROI outside any cell and at least `THRESHOLD` pixels away from the boundary

Set `MASK_PATH` and `ROI_ZIP_PATH` below for a single-image run and tune `THRESHOLD` as needed. Use the optional batch cell to process all matching pairs once parameters are finalized.


In [1]:
# 1) Imports and dependency checks
import os
from pathlib import Path
import numpy as np
import pandas as pd
import tifffile
from skimage import io
from skimage.segmentation import find_boundaries
from scipy.ndimage import distance_transform_edt
import cv2

# ROI reader - install if missing
try:
    from read_roi import read_roi_zip
except Exception as e:
    print("Package 'read-roi' not found. Install with: pip install read-roi")
    raise

# Napari for visualization
import napari

print('Imports OK')


Imports OK


In [2]:
# 2) Parameters and file selection (set these for a single-image run)
# Path to a folder that contains *_mask.tif and corresponding .zip ROI files
FOLDER = Path(r"S:\micro\ts2625\eh2888\lem\HEImages_0123\newData")
THRESHOLD = 30  # pixels (adjustable)
MIN_CELL_AREA = 8000  # pixels; labeled cells with area smaller than this will be removed

# Find mask / roi pairs by basename
mask_paths = sorted(FOLDER.glob("*_mask.tif"))
pairs = []
for m in mask_paths:
    base = m.stem.replace("_mask", "")
    # look for ROI zip with same base
    possible = list(FOLDER.glob(base + "*.zip")) + list(FOLDER.glob(base + ".zip"))
    roi = possible[0] if possible else None
    pairs.append({'base': base, 'mask': m, 'roi': roi})

print(f"Found {len(pairs)} mask files in {FOLDER}")
for i,p in enumerate(pairs,1):
    print(f"{i:2d}. {p['mask'].name}  |  ROI: {p['roi'].name if p['roi'] else 'None found'}")

# Choose one pair for single-image analysis (change index as needed)
SELECT_INDEX = 0
sel = pairs[SELECT_INDEX]
MASK_PATH = sel['mask']
ROI_ZIP_PATH = sel['roi']
print('\nSelected:')
print('  MASK: ', MASK_PATH)
print('  ROI:  ', ROI_ZIP_PATH)
print(f"Minimum cell area filter: {MIN_CELL_AREA} pixels")


Found 6 mask files in S:\micro\ts2625\eh2888\lem\HEImages_0123\newData
 1. 01_01_03_2_mask.tif  |  ROI: 01_01_03_2.zip
 2. 01_01_04_2_mask.tif  |  ROI: 01_01_04_2.zip
 3. 01_01_04_3_mask.tif  |  ROI: 01_01_04_3.zip
 4. 01_02_02_3_mask.tif  |  ROI: None found
 5. 01_05_04_3_mask.tif  |  ROI: 01_05_04_3.zip
 6. 01_07_03_4_mask.tif  |  ROI: 01_07_03_4.zip

Selected:
  MASK:  S:\micro\ts2625\eh2888\lem\HEImages_0123\newData\01_01_03_2_mask.tif
  ROI:   S:\micro\ts2625\eh2888\lem\HEImages_0123\newData\01_01_03_2.zip
Minimum cell area filter: 8000 pixels


In [3]:
# 3) Helper functions: read ROIs and compute centroids

def roi_centroid(roi):
    """Return (x, y) centroid for a read-roi ROI dict."""
    # Common formats
    if 'x' in roi and 'y' in roi:
        xs = np.asarray(roi['x'], dtype=float)
        ys = np.asarray(roi['y'], dtype=float)
        return xs.mean(), ys.mean()
    if 'left' in roi and 'top' in roi and 'width' in roi and 'height' in roi:
        return roi['left'] + roi['width']/2.0, roi['top'] + roi['height']/2.0
    if 'coordinates' in roi:
        coords = np.asarray(roi['coordinates'])
        return coords[:,0].mean(), coords[:,1].mean()
    # points may be stored as lists of tuples under several keys
    for k in ('coords','points'):
        if k in roi:
            coords = np.asarray(roi[k])
            return coords[:,0].mean(), coords[:,1].mean()
    # point ROI sometimes stores single 'x' and 'y' as ints
    if 'x' in roi and isinstance(roi['x'], (int, float)) and 'y' in roi and isinstance(roi['y'], (int,float)):
        return float(roi['x']), float(roi['y'])
    raise ValueError(f"Unknown ROI format, keys: {list(roi.keys())}")


def load_rois_from_zip(zip_path):
    """Return list of (name, centroid_x, centroid_y, roi_dict)"""
    rois = read_roi_zip(str(zip_path))
    items = []
    for name, r in rois.items():
        try:
            cx, cy = roi_centroid(r)
        except Exception as e:
            print(f"Warning: could not parse ROI '{name}': {e}")
            continue
        items.append({'name': name, 'x': float(cx), 'y': float(cy), 'roi': r})
    return items

print('Helper functions ready')


Helper functions ready


In [None]:
# 4) Load mask and ROIs for the selected image
if ROI_ZIP_PATH is None:
    raise FileNotFoundError(f"No ROI zip found for base {sel['base']}. Place the zip in {FOLDER} or update selection.")

# Load mask (labels)
mask = tifffile.imread(str(MASK_PATH))
if mask.ndim > 2:
    # handle multi-channel by taking first channel
    mask = mask[...,0]
mask = mask.astype(np.int32)
orig_num_cells = int(mask.max())
removed_small_cells = 0
if orig_num_cells > 0:
    counts = np.bincount(mask.ravel())
    # find labels with area smaller than MIN_CELL_AREA
    small_labels = np.where(counts < MIN_CELL_AREA)[0]
    small_labels = small_labels[small_labels != 0]
    removed_small_cells = int(len(small_labels))
    if removed_small_cells > 0:
        filtered_mask = mask.copy()
        for lab in small_labels:
            filtered_mask[filtered_mask == lab] = 0
        mask = filtered_mask
        print(f"Removed {removed_small_cells} small cells (<{MIN_CELL_AREA} px).")
    else:
        print("No small cells to remove.")

num_cells = int(mask.max())
print(f"Mask loaded: {MASK_PATH.name} | shape={mask.shape} | original labeled cells={orig_num_cells} | remaining labeled cells={num_cells}")

# Load ROIs
roi_items = load_rois_from_zip(ROI_ZIP_PATH)
print(f"Loaded {len(roi_items)} ROIs from: {ROI_ZIP_PATH.name}")

# Find optionally the original image for visualization
orig_candidates = [f for f in (list(FOLDER.glob(sel['base'] + "*.jpg")) + 
                                list(FOLDER.glob(sel['base'] + "*.png")) + 
                                list(FOLDER.glob(sel['base'] + "*.tif")))
                   if '_mask' not in f.stem]
ORIG_PATH = orig_candidates[0] if orig_candidates else None
if ORIG_PATH:
    try:
        orig_img = io.imread(str(ORIG_PATH))
        print(f"Found original image: {ORIG_PATH.name}")
    except Exception as e:
        print(f"Could not read original image: {e}")
        ORIG_PATH = None

else:
    print("No original image found; will visualize mask only.")

Removed 5 small cells (<8000 px).
Mask loaded: 01_01_03_2_mask.tif | shape=(3192, 3108) | original labeled cells=122 | remaining labeled cells=120
Loaded 84 ROIs from: 01_01_03_2.zip
Found original image: 01_01_03_2.jpg


In [5]:
# 5) Compute distances and classify each ROI
# Precompute background distance map (distance from background pixel to nearest cell pixel)
background_map = distance_transform_edt(mask == 0)

# cache for per-cell inside distance transforms
cell_dist_cache = {}

results = []
for r in roi_items:
    x, y = r['x'], r['y']
    xi, yi = int(round(x)), int(round(y))
    # ensure in-bounds
    if yi < 0 or yi >= mask.shape[0] or xi < 0 or xi >= mask.shape[1]:
        status = 'outside'  # treat out-of-image as outside
        dist = np.nan
        cell_id = 0
    else:
        cell_id = int(mask[yi, xi])
        if cell_id > 0:
            # inside a cell
            if cell_id not in cell_dist_cache:
                cmask = (mask == cell_id)
                cell_dist_cache[cell_id] = distance_transform_edt(cmask)
            dist = float(cell_dist_cache[cell_id][yi, xi])
            status = 'internal' if dist >= THRESHOLD else 'edge'
        else:
            # outside any cell
            dist = float(background_map[yi, xi])
            status = 'outside' if dist >= THRESHOLD else 'edge'
    results.append({
        'name': r['name'],
        'x': x, 'y': y,
        'cell_id': cell_id,
        'distance_to_cell': dist,
        'classification': status
    })

summary = {
    'image_base': sel['base'],
    'num_cells': num_cells,
    'num_rois': len(results),
    'n_internal': sum(1 for r in results if r['classification']=='internal'),
    'n_edge': sum(1 for r in results if r['classification']=='edge'),
    'n_outside': sum(1 for r in results if r['classification']=='outside')
}

print('Summary:')
for k,v in summary.items():
    print(f"  {k}: {v}")

# Convert to DataFrame for saving / inspection
df = pd.DataFrame(results)
df.head()


Summary:
  image_base: 01_01_03_2
  num_cells: 120
  num_rois: 84
  n_internal: 0
  n_edge: 79
  n_outside: 5


Unnamed: 0,name,x,y,cell_id,distance_to_cell,classification
0,0025-0674,674.0,25.0,0,55.461698,outside
1,0231-0377,377.0,231.0,0,20.124612,edge
2,0256-0366,366.0,256.0,0,15.0,edge
3,0370-0489,489.0,370.0,12,3.162278,edge
4,0555-0542,542.0,555.0,0,34.205263,outside


In [1]:
# 7) Save results (CSV and overlay image)
# CSV summary and per-ROI details
out_dir = MASK_PATH.parent / 'roi_analysis_results'
out_dir.mkdir(exist_ok=True)

summary_path = out_dir / f"{sel['base']}_roi_summary.csv"
details_path = out_dir / f"{sel['base']}_roi_details.csv"
df.to_csv(details_path, index=False)
# summary as small csv
pd.DataFrame([summary]).to_csv(summary_path, index=False)
print(f"Saved details: {details_path}")
print(f"Saved summary: {summary_path}")

# Save overlay image (draw colored circles on original or mask image)
if ORIG_PATH is not None:
    overlay = orig_img.copy()
    if overlay.ndim == 2:
        overlay = cv2.cvtColor(overlay, cv2.COLOR_GRAY2BGR)
else:
    overlay = cv2.cvtColor(((mask>0).astype(np.uint8)*255), cv2.COLOR_GRAY2BGR)

# Define color mapping for ROI classifications
color_map = {'internal': 'green', 'edge': 'yellow', 'outside': 'red'}

for r in results:
    x, y = int(round(r['x'])), int(round(r['y']))
    c = color_map[r['classification']]
    bgr = (0,255,0) if c=='green' else (0,255,255) if c=='yellow' else (0,0,255)
    cv2.circle(overlay, (x,y), 6, bgr, -1)

overlay_path = out_dir / f"{sel['base']}_roi_overlay.tif"
tifffile.imwrite(str(overlay_path), overlay)
print(f"Saved overlay: {overlay_path}")


NameError: name 'MASK_PATH' is not defined

In [None]:
# 8) Optional: Batch process all mask+ROI pairs
# This will create a combined CSV summarizing every file found earlier
all_summaries = []
all_details = []
for p in pairs:
    if p['roi'] is None:
        print(f"Skipping {p['base']}: no ROI zip")
        continue
    try:
        mask = tifffile.imread(str(p['mask'])).astype(np.int32)
        orig_num_cells = int(mask.max())
        removed_small_cells = 0
        if orig_num_cells > 0:
            counts = np.bincount(mask.ravel())
            small_labels = np.where(counts < MIN_CELL_AREA)[0]
            small_labels = small_labels[small_labels != 0]
            removed_small_cells = int(len(small_labels))
            if removed_small_cells > 0:
                for lab in small_labels:
                    mask[mask == lab] = 0
        # compute background map
        background_map = distance_transform_edt(mask == 0)
        cell_dist_cache = {}
        roi_items = load_rois_from_zip(p['roi'])
        for r in roi_items:
            x, y = r['x'], r['y']
            xi, yi = int(round(x)), int(round(y))
            if yi < 0 or yi >= mask.shape[0] or xi < 0 or xi >= mask.shape[1]:
                status = 'outside'
                dist = np.nan
                cell_id = 0
            else:
                cell_id = int(mask[yi, xi])
                if cell_id > 0:
                    if cell_id not in cell_dist_cache:
                        cell_dist_cache[cell_id] = distance_transform_edt(mask==cell_id)
                    dist = float(cell_dist_cache[cell_id][yi, xi])
                    status = 'internal' if dist >= THRESHOLD else 'edge'
                else:
                    dist = float(background_map[yi, xi])
                    status = 'outside' if dist >= THRESHOLD else 'edge'
            all_details.append({'image_base': p['base'], 'roi_name': r['name'], 'x': x, 'y': y, 'cell_id': cell_id, 'distance_to_cell': dist, 'classification': status})
        # Calculate counts for this image only (from the last len(roi_items) entries)
        current_rois = all_details[-len(roi_items):]
        summary = {'image_base': p['base'], 'orig_num_cells': orig_num_cells, 'removed_small_cells': removed_small_cells, 'num_cells': int(mask.max()), 'num_rois': len(roi_items), 'n_internal': sum(1 for d in current_rois if d['classification']=='internal'), 'n_edge': sum(1 for d in current_rois if d['classification']=='edge'), 'n_outside': sum(1 for d in current_rois if d['classification']=='outside')}
        all_summaries.append(summary)
        print(f"Processed {p['base']}: original_cells={orig_num_cells}, removed_small={removed_small_cells}, remaining_cells={summary['num_cells']}, rois={summary['num_rois']}")
    except Exception as e:
        print(f"Error processing {p['base']}: {e}")

out_dir = FOLDER / 'roi_analysis_results'
out_dir.mkdir(exist_ok=True)
pd.DataFrame(all_details).to_csv(out_dir / 'all_roi_details.csv', index=False)

pd.DataFrame(all_summaries).to_csv(out_dir / 'all_roi_summary.csv', index=False)print(f"Saved batch results to {out_dir}")

Processed 01_01_02_0: cells=77, rois=32
Processed 01_01_02_1: cells=96, rois=56
Processed 01_01_03_0: cells=50, rois=25
Processed 01_01_03_1: cells=53, rois=21
Processed 01_01_04_0: cells=53, rois=22
Processed 01_01_04_1: cells=33, rois=5
Processed 01_02_02_0: cells=84, rois=48
Processed 01_02_02_1: cells=94, rois=53
Processed 01_02_04_0: cells=40, rois=26
Processed 01_02_04_1: cells=79, rois=39
Processed 01_03_02_0: cells=59, rois=29
Processed 01_03_02_1: cells=54, rois=24
Processed 01_03_03_0: cells=57, rois=33
Processed 01_03_03_1: cells=60, rois=29
Processed 01_03_04_0: cells=62, rois=33
Processed 01_03_04_1: cells=57, rois=27
Processed 01_04_02_0: cells=60, rois=26
Processed 01_04_02_1: cells=52, rois=21
Processed 01_05_02_0: cells=72, rois=44
Processed 01_05_02_1: cells=79, rois=48
Processed 01_05_02_2: cells=82, rois=90
Processed 01_05_04_0: cells=56, rois=40
Processed 01_05_04_1: cells=49, rois=46
Processed 01_06_02_0: cells=65, rois=25
Processed 01_06_02_1: cells=72, rois=59
P