In [1]:
import os
import glob
import numpy as np
from PIL import Image
from typing import List
from openslide import OpenSlide

In [2]:
def load_mrxs_files(data_dir):
    return glob.glob(data_dir + '*.mrxs')

In [3]:
data_files = load_mrxs_files('./testis/40x/') + load_mrxs_files('./testis/40x_2/')

In [4]:
data_files

['./testis/40x/18H14294I.mrxs',
 './testis/40x/19H16747_0110.mrxs',
 './testis/40x/19H11010_0111.mrxs',
 './testis/40x/18H14294II.mrxs',
 './testis/40x_2/19,H,11010,_,01,1,1.mrxs',
 './testis/40x_2/H,15131.mrxs',
 './testis/40x_2/H,08838_20210303140504384.mrxs']

In [5]:
slides = [OpenSlide(file) for file in data_files]

In [6]:
slide = slides[0]
print(f'Levels: {slide.level_count}')
for i, dimensions in enumerate(slide.level_dimensions):
    print(f'LVL {i} - {dimensions[0]:8d} X {dimensions[1]} px')

Levels: 10
LVL 0 -   177152 X 416768 px
LVL 1 -    88576 X 208384 px
LVL 2 -    44288 X 104192 px
LVL 3 -    22144 X 52096 px
LVL 4 -    11072 X 26048 px
LVL 5 -     5536 X 13024 px
LVL 6 -     2768 X 6512 px
LVL 7 -     1384 X 3256 px
LVL 8 -      692 X 1628 px
LVL 9 -      346 X 814 px


In [7]:
def is_gray(color: tuple):
    (r, g, b, a) = color
    diffs = map(abs, (r-g, r-b, g-b))
    return a == 0 or all(diff <= 10 for diff in diffs)


def is_useful_tile(tile: Image) -> bool:
    tile_copy = tile.copy()
    tile_copy.thumbnail((32, 32))
    _, most_frequent_color = max(tile_copy.getcolors(maxcolors=1024), key=lambda x: x[0])
    return not is_gray(most_frequent_color)

In [8]:
def crop_slide_by_grid(slide: OpenSlide, level: int, size: tuple, destination_path: str):
    tile_size_x, tile_size_y = size
    full_size_x, full_size_y = slide.level_dimensions[level]
    filename_prefix = slide._filename.split('/')[-1].replace('.mrxs', '')
    for i in range(full_size_x // tile_size_x):
        for j in range(full_size_y // tile_size_y):
            top_left = (i * tile_size_x * 2 ** level, j * tile_size_x * 2 ** level)
            tile = slide.read_region(
                location=top_left,
                level=level,
                size=size
            )
            if is_useful_tile(tile):
                tile.save(os.path.join(destination_path, f'{filename_prefix}_{level}_{i}_{j}.png'))

In [9]:
def crop_slides_rejection_sampling(
        slides: List[OpenSlide], 
        level: int, 
        size: tuple, 
        destination_path: str, 
        number: int
    ):
    tile_size_x, tile_size_y = size
    saved_number = 0
    d = int(np.ceil(np.log10(number)))
    while saved_number < number:
        slide = np.random.choice(slides)
        filename_prefix = slide._filename.split('/')[-1].replace('.mrxs', '')
        full_size_x, full_size_y = slide.level_dimensions[level]
        top_left_x = np.random.choice(full_size_x - tile_size_x)
        top_left_y = np.random.choice(full_size_y - tile_size_y)
        top_left_rescaled = (top_left_x * 2 ** level, top_left_y * 2 ** level)
        tile = slide.read_region(
            location=top_left_rescaled,
            level=level,
            size=size
        )
        if is_useful_tile(tile):
            tile.save(os.path.join(destination_path, f'{saved_number:0{d}}_{filename_prefix}_{level}.png'))
            saved_number += 1

In [10]:
crop_slides_rejection_sampling(slides, 2, (256, 256), 'cropped_images', 101)