# Spot ROI and Mask Analysis

This notebook analyzes previously generated **ImageJ ROIs** against **Cellpose Masks**.

**Workflow**:
1.  **Matches Files**: Finds triplets of files for each image:
    *   Mask: `..._cells_mask.tif` (or `..._cell_mask.tif`)
    *   Channel 1 ROIs: `..._C1_ROIs.zip`
    *   Channel 2 ROIs: `..._C2_ROIs.zip`
2.  **Process Masks**:
    *   Count cells.
    *   Erode mask by **4 pixels**.
3.  **Analyze ROIs**:
    *   Extract coordinates from `.zip` files.
    *   **Channel 1**: Count total spots.
    *   **Channel 2**: Count total, check if **Inside** or **Outside** the eroded mask.
4.  **Output**: Saves `ROI_Analysis_Results.csv`.
5.  **Visualization**: Boxplots comparing groups (DI_Gastro, Saline_Gastro, WT_Gastro).

In [None]:
# Install necessary library for reading ImageJ ROIs
%pip install roifile pandas tifffile scikit-image matplotlib seaborn

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import tifffile
import roifile
from skimage import morphology
import matplotlib.pyplot as plt
import seaborn as sns

# --- Parameters ---

# Directory containing your masks and ROI zips
data_dir = r"S:\micro\ts2625\eh2888\lem\20260128_EVE_LargeImage\tiff\quantification\cellmask"

erosion_pixels = 4

# --- Helper Functions ---

def get_roi_coordinates(roi_path):
    """
    Reads an ImageJ ROI zip file and returns a list of (y, x) coordinates.
    Handles Point ROIs and MultiPoint ROIs.
    """
    coords = []
    try:
        rois = roifile.ImagejRoi.fromfile(roi_path)
        
        # roifile.fromfile can return a single object or a list
        if not isinstance(rois, list):
            rois = [rois]
            
        for roi in rois:
            # roifile.coordinates usually returns [y, x]
            pts = roi.coordinates()
            
            # If it's a single point, pts might be 1D or (1, 2)
            if pts.ndim == 1:
                pts = pts.reshape(1, -1)
            
            # Append to master list
            for p in pts:
                # roifile returns standard image coordinates. 
                # Important: verify y,x vs x,y. ImageJ usually uses X, Y.
                # numpy/skimage uses Y, X. 
                # roifile.coordinates usually returns [y, x] for most shapes BUT 
                # for Point selection, let's verify.
                # ACTUALLY: roifile usually returns (top, left) or (y, x).
                # Let's assume (y, x) for array indexing.
                coords.append(p)  
                
    except Exception as e:
        print(f"Error reading ROI file {os.path.basename(roi_path)}: {e}")
        
    return np.array(coords)

def erode_mask(mask, pixels=4):
    binary_mask = mask > 0
    selem = morphology.disk(pixels)
    eroded = morphology.binary_erosion(binary_mask, selem)
    return eroded

def find_file_triplets(directory):
    """
    Matches Mask files with corresponding C1 and C2 ROI files.
    Robust strategy: Matches based on filename patterns and verifies file existence.
    """
    triplets = []
    
    # Look for both common mask endings
    possible_masks = glob.glob(os.path.join(directory, "*_cells_mask.tif")) + \
                     glob.glob(os.path.join(directory, "*_cell_mask.tif"))
    
    # De-duplicate if needed
    possible_masks = sorted(list(set(possible_masks)))
    
    for mask_path in possible_masks:
        filename = os.path.basename(mask_path)
        
        # Generate candidate base IDs to check
        candidates = []
        
        if "_ch1_cells_mask.tif" in filename:
            candidates.append(filename.replace("_ch1_cells_mask.tif", ""))
        
        if "_cells_mask.tif" in filename:
            candidates.append(filename.replace("_cells_mask.tif", ""))
            
        if "_cell_mask.tif" in filename:
            candidates.append(filename.replace("_cell_mask.tif", ""))
            
        # Also try strict splitting by underscore as fallback
        candidates.append(filename.rsplit('_', 2)[0])
        
        # Remove duplicates while preserving order
        unique_candidates = []
        [unique_candidates.append(x) for x in candidates if x not in unique_candidates]
        
        found_match = False
        
        for base_id in unique_candidates:
            c1_path = os.path.join(directory, base_id + "_C1_ROIs.zip")
            c2_path = os.path.join(directory, base_id + "_C2_ROIs.zip")
            
            if os.path.exists(c1_path) and os.path.exists(c2_path):
                triplets.append({
                    'id': base_id,
                    'mask': mask_path,
                    'c1_roi': c1_path,
                    'c2_roi': c2_path
                })
                found_match = True
                break # Stop checking candidates for this mask
        
        if not found_match:
            print(f"Warning: Could not find ROI pairs for mask: {filename}")
            
    return triplets

def inspect_first_roi(directory):
    """
    Finds the first ROI zip file and prints its internal structure.
    Useful for verifying if it's Single points, MultiPoints, etc.
    """
    zips = glob.glob(os.path.join(directory, "*_ROIs.zip"))
    if not zips:
        print("No ROI zip files found for inspection.")
        return
        
    sample_file = zips[0]
    print(f"--- Inspecting Sample ROI File: {os.path.basename(sample_file)} ---")
    
    try:
        rois = roifile.ImagejRoi.fromfile(sample_file)
        if not isinstance(rois, list):
            rois = [rois]
            
        print(f"Number of ROI entries in zip: {len(rois)}")
        for i, roi in enumerate(rois[:5]):
            # roitype: 10=Point, etc.
            print(f"  ROI {i+1}: Name='{roi.name}', Type={roi.roitype}, Coords Shape={roi.coordinates().shape}")
        if len(rois) > 5:
            print(f"  ... and {len(rois)-5} more.")
            
    except Exception as e:
        print(f"Error reading sample: {e}")
    print("--------------------------------------------------\n")

# --- Main Loop ---

inspect_first_roi(data_dir)

triplets = find_file_triplets(data_dir)
print(f"Found {len(triplets)} complete sets to analyze.")

results = []

for t in triplets:
    print(f"Processing {t['id']}...")
    
    # 1. Load Mask
    mask = tifffile.imread(t['mask'])
    
    # Count Cells
    cell_ids = np.unique(mask)
    cell_count = len(cell_ids[cell_ids != 0])
    
    # Erode Mask
    eroded = erode_mask(mask, pixels=erosion_pixels)
    
    # 2. Process Channel 1 ROIs
    c1_coords = get_roi_coordinates(t['c1_roi'])
    c1_total = len(c1_coords)
    
    # 3. Process Channel 2 ROIs
    c2_coords = get_roi_coordinates(t['c2_roi'])
    c2_total = len(c2_coords)
    
    c2_inside = 0
    c2_outside = 0
    
    h, w = mask.shape
    
    for coord in c2_coords:
        y, x = int(round(coord[0])), int(round(coord[1]))
        
        # Boundary check
        if 0 <= x < w and 0 <= y < h:
            if eroded[y, x]:
                c2_inside += 1
            else:
                c2_outside += 1
        else:
            c2_outside += 1
            
    results.append({
        'Filename': t['id'],
        'Cell_Count': cell_count,
        'Ch1_Spots': c1_total,
        'Ch2_Spots_Total': c2_total,
        'Ch2_Inside': c2_inside,
        'Ch2_Outside': c2_outside
    })

# Create DataFrame
df = pd.DataFrame(results)

# --- Group Parsing ---
# Assumption: Group is the first two parts of the filename (e.g., DI_Gastro_002 -> DI_Gastro)
df['Group'] = df['Filename'].apply(lambda x: "_".join(x.split("_")[:2]))

# Save Results
csv_path = os.path.join(data_dir, 'ROI_Analysis_Results.csv')
df.to_csv(csv_path, index=False)

print(f"Done! Results saved to {csv_path}")
display(df.head())

## Visualization
Comparison of **Cells**, **Ch1 Spots**, and **Ch2 Spots** across different groups.

In [None]:
if len(df) > 0:
    # Set Plot Style
    sns.set_theme(style="whitegrid")
    
    # Create a figure with 3 subplots
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    order = sorted(df['Group'].unique())
    
    # 1. Cells per File
    sns.boxplot(x='Group', y='Cell_Count', data=df, ax=axes[0], order=order, palette="Set2")
    sns.stripplot(x='Group', y='Cell_Count', data=df, ax=axes[0], order=order, color='black', alpha=0.6, jitter=True)
    axes[0].set_title('Cells per File')
    axes[0].set_ylabel('Count')
    
    # 2. Ch1 Spots per File
    sns.boxplot(x='Group', y='Ch1_Spots', data=df, ax=axes[1], order=order, palette="Set2")
    sns.stripplot(x='Group', y='Ch1_Spots', data=df, ax=axes[1], order=order, color='black', alpha=0.6, jitter=True)
    axes[1].set_title('Ch1 Spots per File')
    axes[1].set_ylabel('Count')
    
    # 3. Ch2 Spots per File (Total)
    sns.boxplot(x='Group', y='Ch2_Spots_Total', data=df, ax=axes[2], order=order, palette="Set2")
    sns.stripplot(x='Group', y='Ch2_Spots_Total', data=df, ax=axes[2], order=order, color='black', alpha=0.6, jitter=True)
    axes[2].set_title('Ch2 Spots per File (Total)')
    axes[2].set_ylabel('Count')
    
    plt.tight_layout()
    plt.show()
    
    # 4. Detailed Breakdown (Ch2 Inside/Outside)
    fig2, axes2 = plt.subplots(1, 2, figsize=(12, 6))
    
    # Inside
    sns.boxplot(x='Group', y='Ch2_Inside', data=df, ax=axes2[0], order=order, palette="Pastel1")
    sns.stripplot(x='Group', y='Ch2_Inside', data=df, ax=axes2[0], order=order, color='black', alpha=0.6, jitter=True)
    axes2[0].set_title('Ch2 Spots: Inside Cells')
    
    # Outside
    sns.boxplot(x='Group', y='Ch2_Outside', data=df, ax=axes2[1], order=order, palette="Pastel1")
    sns.stripplot(x='Group', y='Ch2_Outside', data=df, ax=axes2[1], order=order, color='black', alpha=0.6, jitter=True)
    axes2[1].set_title('Ch2 Spots: Outside Cells')
    
    plt.tight_layout()
    plt.show()

    # Detailed Stats
    print("\n--- Detailed Stats ---")
    print(df.groupby('Group')[['Cell_Count', 'Ch1_Spots', 'Ch2_Spots_Total', 'Ch2_Inside', 'Ch2_Outside']].describe().T)