In [1]:
import sys
from pathlib import Path

# Add project root to path
notebook_dir = Path().resolve()
src_dir = notebook_dir.parent
project_root = src_dir.parent

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))


In [2]:
import tifffile
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2

from src.utils.helpers import compare_two_images, clean_mask
from sklearn.model_selection import train_test_split

In [3]:
image_paths = sorted(["../../data/normalized_images/"+path for path in os.listdir("../../data/normalized_images/") if not path.startswith(".")])
image_paths

['../../data/normalized_images/normalized_CTRL_1_M1_3R__0004.tif',
 '../../data/normalized_images/normalized_CTRL_2_M6_3R__0002.tif',
 '../../data/normalized_images/normalized_CTRL_3_M2_4R__0006.tif',
 '../../data/normalized_images/normalized_CTRL_4_M6_3L__0008.tif',
 '../../data/normalized_images/normalized_E2+DHT_1_M13_3L_0001.tif',
 '../../data/normalized_images/normalized_E2+DHT_1_M13_3L_0010.tif',
 '../../data/normalized_images/normalized_E2+DHT_2_M10_3R_0002.tif',
 '../../data/normalized_images/normalized_E2+DHT_2_M13_4L_0004.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_1_M7_3L_0002.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_1_M7_3L_0013.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_1_M7_3L_0019.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_4_M14_3L_0003.tif',
 '../../data/normalized_images/normalized_E2+P4_2_M9_3R_0003.tif',
 '../../data/normalized_images/normalized_E2+P4_3_M12_3R_0001.tif',
 '../../data/normalized_images/norma

In [4]:
segmentation_paths = sorted(["../../data/cnn_training/original_masks/"+path for path in os.listdir("../../data/cnn_training/original_masks/") if not path.startswith(".")])
segmentation_paths

['../../data/cnn_training/original_masks/normalized_CTRL_1_M1_3R__0004_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_CTRL_3_M2_4R__0006_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_CTRL_4_M6_3L__0008_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+DHT_1_M13_3L_0001_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+DHT_1_M13_3L_0010_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+DHT_2_M10_3R_0002_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+DHT_2_M13_4L_0004_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+P4+DHT_1_M7_3L_0002_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+P4+DHT_1_M7_3L_0013_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+P4+DHT_4_M14_3L_0003_segmented.tif',
 '../../data/cnn_training/original_masks/normalized_E2+P4_3_M12_3R_0001_segmented.tif',
 '../../data/cnn_trai

In [5]:
simple_segmentation_paths = [path.split("/")[-1].replace("_segmented.tif", ".tif") for path in segmentation_paths]
simple_image_paths = [path.split("/")[-1] for path in image_paths]

In [6]:
filtered_image_paths = [path for simple_path, path in zip(simple_image_paths, image_paths) if simple_path in simple_segmentation_paths]
filtered_image_paths

['../../data/normalized_images/normalized_CTRL_1_M1_3R__0004.tif',
 '../../data/normalized_images/normalized_CTRL_3_M2_4R__0006.tif',
 '../../data/normalized_images/normalized_CTRL_4_M6_3L__0008.tif',
 '../../data/normalized_images/normalized_E2+DHT_1_M13_3L_0001.tif',
 '../../data/normalized_images/normalized_E2+DHT_1_M13_3L_0010.tif',
 '../../data/normalized_images/normalized_E2+DHT_2_M10_3R_0002.tif',
 '../../data/normalized_images/normalized_E2+DHT_2_M13_4L_0004.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_1_M7_3L_0002.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_1_M7_3L_0013.tif',
 '../../data/normalized_images/normalized_E2+P4+DHT_4_M14_3L_0003.tif',
 '../../data/normalized_images/normalized_E2+P4_3_M12_3R_0001.tif',
 '../../data/normalized_images/normalized_E2+P4_3_M12_3R_0002.tif',
 '../../data/normalized_images/normalized_E2+P4_3_M12_3R_0005.tif',
 '../../data/normalized_images/normalized_E2_1_M8_3L_0005.tif',
 '../../data/normalized_images/normalized

In [7]:
assert len(filtered_image_paths) == len(segmentation_paths)

In [8]:
images = map(tifffile.imread, filtered_image_paths)
masks = map(tifffile.imread, segmentation_paths)
data = list(zip(images, masks))

In [9]:
# Ensure all masks have the same unique values
unique_values = set()
for i in range(len(data)):
    unique_values.update(np.unique(data[i][1]))
unique_values

{0, 1, 2}

In [10]:
TARGET_SIZE = (512, 512)
OUTPUT_IMAGE_TRAIN_DIR = project_root / "data/cnn_training/resized_images/"
OUTPUT_IMAGE_TEST_DIR = project_root / "data/cnn_training/resized_images_test/"

OUTPUT_MASK_TRAIN_DIR = project_root / "data/cnn_training/resized_masks/"
OUTPUT_MASK_TEST_DIR = project_root / "data/cnn_training/resized_masks_test/"

os.makedirs(OUTPUT_IMAGE_TRAIN_DIR, exist_ok=True)
os.makedirs(OUTPUT_IMAGE_TEST_DIR, exist_ok=True)
os.makedirs(OUTPUT_MASK_TRAIN_DIR, exist_ok=True)
os.makedirs(OUTPUT_MASK_TEST_DIR, exist_ok=True)

OUTPUT_IMAGE_TRAIN_DIR = str(OUTPUT_IMAGE_TRAIN_DIR)
OUTPUT_IMAGE_TEST_DIR = str(OUTPUT_IMAGE_TEST_DIR)
OUTPUT_MASK_TRAIN_DIR = str(OUTPUT_MASK_TRAIN_DIR)
OUTPUT_MASK_TEST_DIR = str(OUTPUT_MASK_TEST_DIR)

In [11]:
train_images, test_images, train_masks, test_masks = train_test_split(filtered_image_paths, segmentation_paths, test_size=0.25, random_state=42)
print(len(train_images), len(test_images), len(train_masks), len(test_masks))

12 5 12 5


In [12]:
for image_path, mask_path in zip(train_images, train_masks):
    image = tifffile.imread(image_path)
    mask = tifffile.imread(mask_path)
    original_mask_dtype = mask.dtype
    image = cv2.resize(image, TARGET_SIZE, interpolation=cv2.INTER_CUBIC)
    mask = cv2.resize(mask, TARGET_SIZE, interpolation=cv2.INTER_NEAREST)
    mask = mask.astype(original_mask_dtype)
    tifffile.imwrite(OUTPUT_IMAGE_TRAIN_DIR + "/" + image_path.split("/")[-1], image)
    tifffile.imwrite(OUTPUT_MASK_TRAIN_DIR + "/" + mask_path.split("/")[-1], mask)

In [13]:
for image_path, mask_path in zip(test_images, test_masks):
    image = tifffile.imread(image_path)
    mask = tifffile.imread(mask_path)
    image = cv2.resize(image, TARGET_SIZE, interpolation=cv2.INTER_CUBIC)
    mask = cv2.resize(mask, TARGET_SIZE, interpolation=cv2.INTER_NEAREST)
    tifffile.imwrite(OUTPUT_IMAGE_TEST_DIR + "/" + image_path.split("/")[-1], image)
    tifffile.imwrite(OUTPUT_MASK_TEST_DIR + "/" + mask_path.split("/")[-1], mask)