In [19]:
import os
import numpy as np
from tifffile import imread
from sklearn.model_selection import train_test_split
from scipy import ndimage as ndi
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage.measure import label
from PIL import Image         # для 8‑бит и 16‑бит сохранения
import imageio.v2 as imageio   # fallback для 16‑бит PNG

# ---------- утилиты ---------------------------------
def normalize_to_uint8(arr):
    arr = arr.astype(np.float32)
    mn, mx = arr.min(), arr.max()
    return (np.zeros_like(arr) if mx <= mn else (arr - mn) / (mx - mn) * 255).astype(np.uint8)

def binary_to_instance_mask(bin_mask):
    bin_mask = bin_mask > 0
    dist = ndi.distance_transform_edt(bin_mask)
    coords = peak_local_max(dist, labels=bin_mask, footprint=np.ones((3, 3)), exclude_border=False)
    seeds = np.zeros_like(dist, dtype=bool)
    if coords.size:                      # защищаемся, если объектов нет
        seeds[tuple(coords.T)] = True
    markers = label(seeds)
    return watershed(-dist, markers, mask=bin_mask).astype(np.uint16)

def prepare_split(files, seed=42):
    train, test = train_test_split(files, test_size=0.15, random_state=seed)
    train, val = train_test_split(train, test_size=0.1765, random_state=seed)  # 0.15 / 0.85
    return {'train': train, 'val': val, 'test': test}

# ---------- основной пайплайн -----------------------
def process_volume_folder(src_dir, dst_dir, is_mask=False, split_map=None):
    for split in ('train', 'val', 'test'):
        os.makedirs(os.path.join(dst_dir, split), exist_ok=True)

    for fname in sorted(os.listdir(src_dir)):
        if not fname.lower().endswith(('.tif', '.tiff')):
            continue
        split = split_map.get(fname, 'train') if split_map else 'train'
        vol = imread(os.path.join(src_dir, fname))
        if vol.ndim == 2:              # одиночный срез
            vol = vol[np.newaxis, ...]
        elif vol.ndim == 4 and vol.shape[0] == 1:
            vol = vol[0]

        base = os.path.splitext(fname)[0]
        for z, sl in enumerate(vol):
            if is_mask:
                u = np.unique(sl)
                if len(u) <= 2 and set(u).issubset({0, 1}):   # бинарная → watershed
                    sl = binary_to_instance_mask(sl)
                else:
                    sl = sl.astype(np.uint16)

                max_lbl = sl.max()
                if max_lbl <= 255:     # 8‑битного хватит
                    sl8 = sl.astype(np.uint8)
                    out = Image.fromarray(sl8, mode='L')
                else:                  # нужно 16‑бит
                    sl16 = sl.astype(np.uint16)
                    try:               # Pillow ≥9.1 умеет 'I;16' PNG
                        out = Image.fromarray(sl16, mode='I;16')
                    except ValueError: # fallback через imageio
                        out = None

                fname_out = f"{base}_{z:03d}_cp_masks.png"
                path_out = os.path.join(dst_dir, split, fname_out)

                if out is not None:
                    out.save(path_out, "PNG")
                else:                  # imageio‑fallback для 16‑бит
                    imageio.imwrite(path_out, sl16)

                # отладка
                print(f"[mask] {fname} z={z}  unique={np.unique(sl)[:10]}{'...' if max_lbl>10 else ''}")
            else:
                sl8 = normalize_to_uint8(sl)
                Image.fromarray(sl8, mode='L').save(
                    os.path.join(dst_dir, split, f"{base}_{z:03d}.png"), "PNG"
                )

    print(f"✔  processed {src_dir} → {dst_dir}")

# ---------- пути и запуск ---------------------------
IMAGES_DIR = "/NAS/mmaiurov/Datasets/Hela_MRC/images/"
MASKS_DIR  = "/NAS/mmaiurov/Datasets/Hela_MRC/masks/"
OUTPUT_DIR = "/NAS/mmaiurov/Datasets/Hela_MRC_cellpose"

files = [f for f in os.listdir(IMAGES_DIR) if f.lower().endswith(('.tif', '.tiff'))]
split_map = {f: s for s, lst in prepare_split(files).items() for f in lst}

In [21]:
# === Обработка ===
process_volume_folder(IMAGES_DIR, OUTPUT_DIR, is_mask=False, split_map=split_map)

✔  processed /NAS/mmaiurov/Datasets/Hela_MRC/images/ → /NAS/mmaiurov/Datasets/Hela_MRC_cellpose


In [20]:
process_volume_folder(MASKS_DIR,  OUTPUT_DIR, is_mask=True,  split_map=split_map)

[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=0  unique=[0]
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=1  unique=[0]
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=2  unique=[0]
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=3  unique=[0 1 2 6 7 8]
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=4  unique=[ 0  1  2  6  7  8  9 10 11]...
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=5  unique=[ 0  1  2  3  6  7  8  9 10 11]...
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=6  unique=[ 0  1  2  3  6  7  8  9 10 11]...
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=7  unique=[ 0  2  3  6  7  8  9 10 11 12]...
[mask] 20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00.tif z=8  unique=[ 0  2  3  7  9 12 13 14 16 17]...
[mask] 20221107_HeLa_MRC_24hLN_DLS Ac

In [22]:
from imageio import imread
m = imread("/NAS/mmaiurov/Datasets/Hela_MRC_cellpose/test/20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00_003_cp_masks.png")
print(np.unique(m))

[0 1 2 6 7 8]


  m = imread("/NAS/mmaiurov/Datasets/Hela_MRC_cellpose/test/20221107_HeLa_MRC_24hLN_DLS Acquire 2_007_Target_Lng_z00_ch00_003_cp_masks.png")
