In [None]:
import os
import numpy as np
import re
from skimage import io, segmentation, color
from PIL import Image

# ─── paths ───
BASE_DIR = '/content/drive/MyDrive/biotech/Retina_Lab/Image_data'
INPUT_DIR = os.path.join(BASE_DIR, 'cropped_others')
OUTPUT_DIR = os.path.join(BASE_DIR, 'extracted_oth')

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Get all files in the input directory
all_files = sorted(os.listdir(INPUT_DIR))

# Find all PNG files and their matching NPY files
processed_pairs = 0
total_cells_extracted = 0
png_files = [f for f in all_files if f.lower().endswith('.png')]

print(f"Found {len(png_files)} PNG files in {INPUT_DIR}")

for i, png_file in enumerate(png_files):
    # Get the base name without extension
    base_name = os.path.splitext(png_file)[0]
    npy_file = base_name + '.npy'

    # Skip if the matching NPY file doesn't exist
    npy_path = os.path.join(INPUT_DIR, npy_file)
    if not os.path.exists(npy_path):
        print(f"No matching NPY file found for {png_file}, skipping.")
        continue

    png_path = os.path.join(INPUT_DIR, png_file)

    # Extract identifier for naming output files
    match = re.search(r'(C\d+_\d+)', png_file)
    identifier = match.group(1) if match else base_name

    print(f"Processing pair ({i+1}/{len(png_files)}): {png_file} and {npy_file}")

    try:
        # ─── load image ───
        img = io.imread(png_path)

        # ─── load the Cellpose .npy (it's saved as a dict) ───
        raw = np.load(npy_path, allow_pickle=True)
        data = raw.item()            # now data is a dict
        masks = data['masks']        # int32 label array, shape == img.shape[:2]

        # Print shapes for debugging
        print(f"  Image shape: {img.shape}")
        print(f"  Masks shape: {masks.shape}")

        # Check if the masks shape matches the image shape
        if masks.shape[:2] != img.shape[:2]:
            print(f"  Warning: Mask shape {masks.shape[:2]} doesn't match image shape {img.shape[:2]}")
            continue

        # Skip boundary overlay section which is causing errors
        # Instead, focus on extracting the individual cells

        # ─── extract & save each cell ───
        cells_extracted = 0

        for lab in np.unique(masks):
            if lab == 0:  # Skip background
                continue

            # boolean mask for this cell
            m = masks == lab
            ys, xs = np.where(m)
            if len(ys) == 0 or len(xs) == 0:
                continue

            y0, x0 = ys.min(), xs.min()
            y1, x1 = ys.max()+1, xs.max()+1

            # crop image & mask
            patch = img[y0:y1, x0:x1].copy()
            m_crop = m[y0:y1, x0:x1]

            # zero‑out background based on image dimensions
            if patch.ndim == 3:
                # Handle multi-channel images (RGB or RGBA)
                for c in range(patch.shape[2]):
                    channel = patch[:, :, c]
                    channel[~m_crop] = 0
                    patch[:, :, c] = channel
            else:
                # Handle grayscale images
                patch = patch * m_crop

            # save as PNG with zero‑padded ID
            fname = os.path.join(OUTPUT_DIR, f'{identifier}_cell_{lab:03d}.png')
            Image.fromarray(patch).save(fname)
            cells_extracted += 1

        print(f"  Extracted {cells_extracted} cells from {png_file}")
        total_cells_extracted += cells_extracted
        processed_pairs += 1

    except Exception as e:
        print(f"  Error processing pair {png_file} and {npy_file}: {str(e)}")

print(f"Processing complete. Processed {processed_pairs} image/segmentation pairs.")
print(f"Total of {total_cells_extracted} cells extracted to {OUTPUT_DIR}")

Found 8 PNG files in /content/drive/MyDrive/biotech/Retina_Lab/Image_data/cropped_others
Processing pair (1/8): C3_00010000_oth.png and C3_00010000_oth.npy
  Image shape: (441, 1024, 4)
  Masks shape: (441, 1024)
  Extracted 106 cells from C3_00010000_oth.png
Processing pair (2/8): C3_00010001_oth.png and C3_00010001_oth.npy
  Image shape: (399, 1024, 4)
  Masks shape: (399, 1024)
  Extracted 116 cells from C3_00010001_oth.png
Processing pair (3/8): C3_00010002_oth.png and C3_00010002_oth.npy
  Image shape: (447, 1024, 4)
  Masks shape: (447, 1024)
  Extracted 108 cells from C3_00010002_oth.png
Processing pair (4/8): C3_00010003_oth.png and C3_00010003_oth.npy
  Image shape: (438, 1024, 4)
  Masks shape: (438, 1024)
  Extracted 100 cells from C3_00010003_oth.png
Processing pair (5/8): C3_00010004_oth.png and C3_00010004_oth.npy
  Image shape: (411, 1024, 4)
  Masks shape: (411, 1024)
  Extracted 94 cells from C3_00010004_oth.png
Processing pair (6/8): C3_00010005_oth.png and C3_0001000