# ROI Analysis and Classification

This notebook analyzes cell segmentation masks and ImageJ ROI files to classify ROIs based on their distance from cell boundaries.

**Goal:**
For each image:
1.  Count cells (from mask).
2.  Count ROIs.
3.  Classify ROIs as:
    *   **Internal:** Inside cell, > 30px from edge.
    *   **Outside:** Outside cell, > 30px from edge.
    *   **Edge:** Within 30px of the edge (inside or outside).

**Input:**
*   Mask files: `*_mask.tif`
*   ROI files: `*.zip`

**Dependencies:**
*   `numpy`, `pandas`, `tifffile`, `scipy`, `roifile`, `opencv-python`, `seaborn`, `matplotlib`

In [None]:
# Install roifile if not already installed
%pip install roifile tifffile seaborn matplotlib

In [None]:
import numpy as np
import pandas as pd
import tifffile
from roifile import ImagejRoi
from scipy import ndimage
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import re

# Set Data Path
DATA_PATH = Path(r"S:\micro\ts2625\eh2888\lem\HEImages_0123\quantification")

print(f"Data Path: {DATA_PATH}")
if not DATA_PATH.exists():
    print("WARNING: Data Path does not exist!")
else:
    print("Data Path found.")

## Helper Functions

In [None]:
def get_roi_centroid(roi):
    """
    Robustly extract centroid (y, x) from an ROI object.
    Handles Points, Polygons, etc.
    """
    try:
        # roifile coordinates are usually relative to the bounding box (.left, .top)
        # Check coordinates first
        coords = roi.coordinates(integers=False)
        
        if coords is not None and len(coords) > 0:
            # Calculate mean position relative to Top-Left
            mean_y = np.mean(coords[:, 1])
            mean_x = np.mean(coords[:, 0])
            
            # Add bounding box offset
            y = roi.top + mean_y
            x = roi.left + mean_x
            return (y, x)

        # Fallback for Point/Rect/Oval that might return empty coordinates
        # Use simple bounding box center
        y = roi.top + (roi.bottom - roi.top) / 2.0
        x = roi.left + (roi.right - roi.left) / 2.0
        
        return (y, x)
        
    except Exception as e:
        print(f"Warning: Could not extract coordinates from ROI {roi}: {e}")
        # Last resort: just use top/left as 0,0 approximation is bad but...
        if hasattr(roi, 'top'):
             return (roi.top, roi.left)
        return (0, 0)

def compute_signed_distance_map(mask):
    """
    Computes a signed distance map from a binary mask.
    Positive values: Distance to background (inside cells).
    Negative values: -Distance to object (outside cells).
    Boundary is approx 0.
    """
    binary_mask = mask > 0
    
    # Distance from inside to nearest background
    dist_in = ndimage.distance_transform_edt(binary_mask)
    
    # Distance from outside to nearest object
    # Invert mask
    dist_out = ndimage.distance_transform_edt(~binary_mask)
    
    # Combine: Inside is positive, Outside is negative
    signed_dist = dist_in - dist_out
    return signed_dist

In [None]:
def process_image(mask_path, roi_path, edge_thr=30, debug=False):
    """
    Processes a single image pair (mask + roi zip).
    """
    # Load Mask
    try:
        mask = tifffile.imread(mask_path)
    except Exception as e:
        print(f"Error loading mask {mask_path}: {e}")
        return None
    
    # Count Cells (Mask labels)
    try:
        num_cells = len(np.unique(mask)) - 1 if 0 in np.unique(mask) else len(np.unique(mask))
    except Exception as e:
        print(f"Error counting cells in {mask_path}: {e}")
        num_cells = 0
    
    # Load ROIs
    try:
        rois = ImagejRoi.fromfile(roi_path)
        # If single ROI, make list
        if not isinstance(rois, list):
            rois = [rois]
    except FileNotFoundError:
        print(f"No ROI file found at {roi_path}")
        return None
    except Exception as e:
        print(f"Error loading ROIs: {e}")
        return None
        
    # Compute Distance Map
    try:
        dist_map = compute_signed_distance_map(mask)
    except Exception as e:
        print(f"Error computing distance map: {e}")
        return None
    
    # --- DIAGNOSTICS ---
    if debug:
        print("\n--- DIAGNOSTICS ---")
        print(f"Image: {mask_path.name}")
        print(f"Mask Shape: {mask.shape}")
        print(f"Distance Map Range: Min={dist_map.min():.2f}, Max={dist_map.max():.2f}")
        print(f"Number of ROIs: {len(rois)}")
        print(f"Internal Threshold: > {edge_thr} px")
        
        if dist_map.max() < edge_thr:
            print(f"*** WARNING: Max distance ({dist_map.max():.2f}) is less than threshold ({edge_thr}). No ROIs can be 'Internal'!")
        
        # Print details of just the first ROI to check coordinates
        if len(rois) > 0:
            print(f"DEBUG First ROI: Type={rois[0].roitype}, Bounds(L,T,R,B)=({rois[0].left}, {rois[0].top}, {rois[0].right}, {rois[0].bottom})")
    # ------------------

    roi_classifications = []
    
    for i, roi in enumerate(rois):
        try:
            # Get centroid (y, x)
            y, x = get_roi_centroid(roi)
            y, x = int(y), int(x)
            
            # Check bounds
            if y < 0 or y >= mask.shape[0] or x < 0 or x >= mask.shape[1]:
                roi_class = "Out of Bounds"
                d = float('nan')
            else:
                # Get distance
                d = dist_map[y, x]
                
                if d > edge_thr:
                    roi_class = "Internal"
                elif d < -edge_thr:
                    roi_class = "Outside"
                else:
                    # -30 <= d <= 30
                    roi_class = "Edge"
            
            roi_classifications.append(roi_class)
            
            if debug and i < 10:
                print(f"ROI {i}: Centroid(y={y}, x={x}) | Dist={d:.2f} | Class={roi_class}")
                
        except Exception as e:
            if debug: print(f"Error determining ROI class for ROI {i}: {e}")
            roi_classifications.append("Error")
        
    # Summary
    classification_counts = pd.Series(roi_classifications).value_counts().to_dict()
    
    result = {
        "Image": mask_path.stem.replace("_mask", ""),
        "Num_Cells": num_cells,
        "Num_ROIs": len(rois),
        "Internal": classification_counts.get("Internal", 0),
        "Edge": classification_counts.get("Edge", 0),
        "Outside": classification_counts.get("Outside", 0),
    }
    
    return result

## Batch Processing

Iterate through all matching file pairs in the folder.

In [None]:
results = []

print(f"Checking inputs in: {DATA_PATH}")
if not DATA_PATH.exists():
    print("CRITICAL ERROR: DATA_PATH does not exist.")
else:
    # Find all mask files
    mask_files = list(DATA_PATH.glob("*_mask.tif"))
    print(f"Found {len(mask_files)} mask files.")

    if len(mask_files) == 0:
        print("WARNING: No mask files found (looking for *_mask.tif).")
        print("Listing first 10 files in directory to help debug:")
        try:
            for f in list(DATA_PATH.glob("*"))[:10]:
                print(f"  {f.name}")
        except Exception as e:
            print(f"Error listing directory: {e}")

    for mask_file in mask_files:
        # Construct expected ROI filename
        # Mask: 01_01_02_0_mask.tif -> ROI: 01_01_02_0.zip
        base_name = mask_file.stem.replace("_mask", "")
        roi_file = mask_file.parent / f"{base_name}.zip"
        
        print(f"Processing {base_name}...")
        if not roi_file.exists():
             print(f"  Skipping: ROI file not found at {roi_file.name}")
             continue
             
        # Run detailed diagnostics for the first image or if results imply issues
        run_diagnostics = False # Set to True to debug
        
        res = process_image(mask_file, roi_file, edge_thr=30, debug=run_diagnostics)
        if res:
            results.append(res)
            print(f"  Finished {base_name}: Cells={res['Num_Cells']}, ROIs={res['Num_ROIs']}")
        else:
            print(f"  Failed to process {base_name}")

df_results = pd.DataFrame(results)
df_results.head()

In [None]:
# Save Results
if not df_results.empty:
    output_csv = DATA_PATH / "ROI_Analysis_Results_v3.csv"
    df_results.to_csv(output_csv, index=False)
    print(f"Saved results to {output_csv}")
else:
    print("No results to save.")

## Visualization (Group Summary)

Aggregates results by experiment group and creates a summary plot.

In [None]:
if not df_results.empty:
    # 1. Parse Group from filename
    # Slide mapping from reference:
    # 1-4: WT-Saline
    # 5-8: HSA_LR-Saline
    # 9-13: HSA_LR-CLAAAV
    # 14-15: eGFP
    
    def slide_to_group(slide_num):
        try:
            s = int(slide_num)
            if s in [1, 2, 3, 4]: return 'WT-Saline'
            if s in [5, 6, 7, 8]: return 'HSA_LR-Saline'
            if s in [9, 10, 11, 12, 13]: return 'HSA_LR-CLAAAV'
            if s in [14, 15]: return 'eGFP'
            return 'Unknown'
        except:
            return 'Unknown'

    def get_slide_num(filename):
        # Filename format: NN_Slide_Region...
        # Split by '_' or '-'
        parts = re.split(r'[_-]', str(filename))
        # Extract digits
        digits = [p for p in parts if p.isdigit()]
        if len(digits) >= 2:
            return digits[1] # Return the second number as slide number
        elif len(digits) == 1:
             return digits[0] # Fallback
        return None

    # Apply parsing
    df_results['slide_num'] = df_results['Image'].apply(get_slide_num)
    df_results['group'] = df_results['slide_num'].apply(slide_to_group)
    
    # 2. Aggregating by Group
    # We sum up the counts for all images in the group
    group_agg = df_results.groupby('group').agg({
        'Num_ROIs': 'sum',
        'Internal': 'sum'
    }).reset_index()
    
    print("\nGroup Summary:")
    print(group_agg)

    # 3. Plotting
    # Prepare melted DF for Total vs Internal comparison
    plot_df = group_agg.melt(id_vars='group', value_vars=['Num_ROIs','Internal'], 
                             var_name='type', value_name='count')
    
    # Rename for clarity
    plot_df['type'] = plot_df['type'].replace({'Num_ROIs': 'Total Nuclei', 'Internal': 'Internal Nuclei'})
    
    # Define comparison order
    desired_order = ['WT-Saline', 'HSA_LR-Saline', 'HSA_LR-CLAAAV', 'eGFP']
    order = [g for g in desired_order if g in plot_df['group'].unique()]
    
    # Append unknown groups if any
    for g in plot_df['group'].unique():
        if g not in order:
            order.append(g)
    
    # Create Plot
    plt.figure(figsize=(10,6))
    sns.set_style("whitegrid")
    
    # Using the palette from reference: ['#4C72B0', '#DD8452'] (Blue, Orange)
    ax = sns.barplot(data=plot_df, x='group', y='count', hue='type', 
                     order=order, palette=['#4C72B0', '#DD8452'])
    
    # Add value labels on bars
    for container in ax.containers:
        ax.bar_label(container)
        
    plt.title('Total vs Internal Nuclei per Group', fontsize=14)
    plt.xlabel('Group', fontsize=12)
    plt.ylabel('Count', fontsize=12)
    plt.legend(title='')
    plt.tight_layout()
    
    # Save plot
    plot_path = DATA_PATH / 'ROI_Analysis_Group_Summary.png'
    plt.savefig(plot_path, dpi=300)
    print(f"Saved summary plot to {plot_path}")
    plt.show()