In [2]:

import os
import shutil
from pathlib import Path

import cv2
from tqdm import tqdm

if hasattr(os, 'add_dll_directory'):
    # Windows
    OPENSLIDE_PATH = os.path.join(os.path.abspath(os.getcwd()),
                                  "libs/openslide-bin-4.0.0.3-windows-x64/bin")
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide
import numpy as np
import math

In [3]:


def grid_segment_slides(input_dir, root_output_dir, filter=None, cell_size=256, level=0):
    if os.path.exists(root_output_dir):
        shutil.rmtree(root_output_dir)
    for slide_filename in os.listdir(input_dir):
        output_dir = f"{root_output_dir}/{Path(slide_filename).stem}/{cell_size}x{cell_size}"
        os.makedirs(output_dir, exist_ok=True)
        slide = openslide.OpenSlide(f"{input_dir}/{slide_filename}")

        slide_width, slide_height = slide.level_dimensions[level]
        cells_count_x = math.floor(slide_width / cell_size)
        cells_count_y = math.floor(slide_height / cell_size)
        with tqdm(total=cells_count_x * cells_count_y, desc="Progress") as pbar:
            for i, x in enumerate(range(0, slide_width, cell_size)):
                for j, y in enumerate(range(0, slide_height, cell_size)):
                    cell = np.array(slide.read_region((x, y), level, (cell_size, cell_size)))
                    if filter is None or filter(cell):
                        cell_file_path = f"{output_dir}/{i},{j}_{x}_{y}.png"
                        cv2.imwrite(cell_file_path, cell)
                    pbar.update(1)


def is_not_mostly_blank(cell, non_blank_percentage=0.5, blank_threshold=240):
    cell_gray = cv2.cvtColor(cell, cv2.COLOR_BGR2GRAY)
    non_white_pixels = np.sum(cell_gray < blank_threshold)
    return (non_white_pixels / cell_gray.size) > non_blank_percentage


grid_segment_slides(
    input_dir="data/whole-slides/gut",
    root_output_dir="output/temp",
    filter=lambda cell: is_not_mostly_blank(cell, non_blank_percentage=0.5),
)

Progress:   1%|▏         | 1913/135125 [00:06<08:02, 276.13it/s]


KeyboardInterrupt: 