In [18]:

import os

if hasattr(os, 'add_dll_directory'):
    # Windows
    OPENSLIDE_PATH = os.path.join(os.path.abspath(os.getcwd()),
                                  "libs/openslide-bin-4.0.0.3-windows-x64/bin")
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide

In [19]:
import json
import os
import xml.etree.ElementTree as ET
from pathlib import Path

import cv2
import numpy as np
import openslide

input_dir = "data/whole-slides/gut"

for filename in os.listdir(input_dir):
    if Path(filename).suffix != ".xml":
        continue
    regions_tag = ET.parse(f'{input_dir}/{filename}').getroot().find('Annotation/Regions')
    bounding_boxes = []
    for region_tag in regions_tag.findall('Region'):
        # print(region_tag)
        xs = []
        ys = []
        for vertex_tag in region_tag.findall('Vertices/Vertex'):
            x, y = int(vertex_tag.attrib['X']), int(vertex_tag.attrib['Y'])
            xs.append(x)
            ys.append(y)
        x_min, y_min = min(xs), min(ys)
        x_max, y_max = max(xs), max(ys)
        width = x_max - x_min
        height = y_max - y_min
        bounding_boxes.append({"x_min": x_min, "y_min": y_min, "width": width, "height": height})
    print()
    print(bounding_boxes)
    with open(f"{input_dir}/{Path(filename).stem}.json", 'w') as f:
        json.dump(bounding_boxes, f)

    slide_filename = f"{Path(filename).stem}.svs"
    output_dir = f"data/rois/{Path(slide_filename).stem}/"
    os.makedirs(output_dir, exist_ok=True)
    slide = openslide.OpenSlide(f"{input_dir}/{slide_filename}")
    for i, bounding_box in enumerate(bounding_boxes):
        x, y, w, h = bounding_box["x_min"], bounding_box["y_min"], bounding_box["width"], bounding_box["height"]
        roi = np.array(slide.read_region((x, y), 0, (w, h)))
        cv2.imwrite(f"{output_dir}/{i}_{x},{y}_{w},{h}.png", roi)


[{'x_min': 108200, 'y_min': 23667, 'width': 292, 'height': 323}]

[{'x_min': 162664, 'y_min': 29288, 'width': 242, 'height': 236}, {'x_min': 163485, 'y_min': 29469, 'width': 253, 'height': 244}, {'x_min': 162129, 'y_min': 29096, 'width': 173, 'height': 164}, {'x_min': 167794, 'y_min': 20703, 'width': 486, 'height': 196}, {'x_min': 150454, 'y_min': 35879, 'width': 157, 'height': 129}, {'x_min': 147624, 'y_min': 32247, 'width': 178, 'height': 140}, {'x_min': 155621, 'y_min': 25969, 'width': 341, 'height': 218}, {'x_min': 168227, 'y_min': 22700, 'width': 335, 'height': 178}, {'x_min': 149590, 'y_min': 16378, 'width': 168, 'height': 330}, {'x_min': 134934, 'y_min': 39834, 'width': 178, 'height': 144}, {'x_min': 132153, 'y_min': 38294, 'width': 178, 'height': 234}, {'x_min': 127993, 'y_min': 39672, 'width': 199, 'height': 225}, {'x_min': 104503, 'y_min': 39517, 'width': 373, 'height': 403}, {'x_min': 103250, 'y_min': 38803, 'width': 307, 'height': 316}, {'x_min': 101910, 'y_min': 37489, 'w