In [21]:
import xml.etree.ElementTree as ET
import numpy as np
import cv2
from PIL import Image
import random
import matplotlib.pyplot as plt
from pathlib import Path

In [14]:
def he_to_binary_mask(filename):
    # File paths
    im_file = f"MoNuSeg 2018 Training Data/Tissue Images/{filename}.tif"
    xml_file = f"MoNuSeg 2018 Training Data/Annotations/{filename}.xml"
    # Parse the XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Extract region vertices
    xy = []  # List to hold vertices for each region
    for region in root.findall(".//Region"):
        vertices = region.findall(".//Vertex")
        region_coords = []
        for vertex in vertices:
            x = float(vertex.get("X"))
            y = float(vertex.get("Y"))
            region_coords.append((x, y))
        xy.append(np.array(region_coords))

    # Get image dimensions
    with Image.open(im_file) as img:
        nrow, ncol = img.height, img.width

    # Initialize masks
    binary_mask = np.zeros((nrow, ncol), dtype=np.int32)  # Use int32 to handle larger values
    color_mask = np.zeros((nrow, ncol, 3), dtype=np.float32)

    # Process each region
    for zz, region_coords in enumerate(xy, start=1):
        # print(f"Processing object # {zz}")
        smaller_x, smaller_y = region_coords[:, 0], region_coords[:, 1]

        # Create a polygon mask
        polygon = np.zeros((nrow, ncol), dtype=np.uint8)
        cv2.fillPoly(polygon, [region_coords.astype(np.int32)], 1)

        # Update binary mask
        binary_mask += zz * (1 - np.minimum(1, binary_mask)) * polygon

        # Update color mask
        color_mask += np.stack([random.random() * polygon,
                                random.random() * polygon,
                                random.random() * polygon], axis=-1)


    # Visualize masks
    # plt.figure()
    # plt.title("Binary Mask")
    # plt.imshow(binary_mask, cmap="gray")
    # plt.show()

    # plt.figure()
    # plt.title("Color Mask")
    # plt.imshow(color_mask)
    # plt.show()

    return binary_mask, color_mask

In [None]:
# Example usage:
filename = "TCGA-18-5592-01Z-00-DX1" # Replace with your filename without extension


# Specify the directory
directory = Path("MoNuSeg 2018 Training Data/Annotations")

# Loop through every file in the folder
for file_path in directory.iterdir():
    # Check if it's a file (not a directory)
    if file_path.is_file():
        # Get the file name without the extension
        file_name_without_extension = file_path.stem
        print(file_name_without_extension)
        binary_mask, color_mask = he_to_binary_mask(file_name_without_extension)
        path='MoNuSeg 2018 Training Data/Masks/'
        np.savez(f'{path}{file_name_without_extension}.npz', binary_mask=binary_mask, color_mask=color_mask)

# loaded_data = np.load(f'{path}{filename}.npz')
# loaded_binary_mask = loaded_data['binary_mask']
# loaded_color_mask = loaded_data['color_mask']

In [None]:

# plt.figure()
# plt.title("Binary Mask")
# plt.imshow(loaded_binary_mask, cmap="gray")
# plt.show()

# plt.figure()
# plt.title("Color Mask")
# plt.imshow(loaded_color_mask)
# plt.show()