In [2]:
import os
import numpy as np
from tqdm import tqdm
import openslide
import random
from PIL import Image
from collections import defaultdict
import cv2

In [3]:
tile_size = 192

out_dir = "/home/smedin7/data/KDM/new_mosaics"
image_folder = '/home/smedin7/data/KDM/urkunina_hun_slides/slides_hun'
save_path_patches = "/home/smedin7/data/KDM/new_pretraining_patches"
save_path = "/home/smedin7/data/KDM/new_patches"

In [None]:
def is_tissue_patch(image_array, white_threshold=0.9):
    """
    Determines if a patch contains sufficient tissue based on its 'whiteness'.
    """
    # Sum up all the pixel values in the patch
    total_pixel_sum = image_array.sum()

    # Calculate the maximum possible sum for a completely white patch
    max_pixel_sum = 255 * 3 * image_array.shape[0] * image_array.shape[1]

    # Calculate the 'whiteness' ratio of the patch
    whiteness_ratio = total_pixel_sum / max_pixel_sum

    return whiteness_ratio < white_threshold

HUN

In [11]:
def extract_patches_from_slide(slide_path, level, patch_size, stride):
    patches = []
    slide = openslide.OpenSlide(slide_path)
    level_dimension = slide.level_dimensions[level]
    downsample = slide.level_downsamples[level]

    xs = np.arange(0, level_dimension[0] - patch_size, stride)
    ys = np.arange(0, level_dimension[1] - patch_size, stride)
    random.shuffle(xs)
    random.shuffle(ys)

    for x in xs:
        for y in ys:
            patch = slide.read_region((int(x * downsample), int(y * downsample)), level, (patch_size, patch_size))
            patch_array = np.array(patch)[:, :, :3]
            if is_tissue_patch(patch_array):
                patches.append(patch_array)
            if len(patches) >= 18:
                return patches
    return patches

def create_mosaic(patches):
    mosaic = np.zeros((1152, 1152, 3), dtype=np.uint8)
    for i in range(6):
        for j in range(6):
            mosaic[i*192:(i+1)*192, j*192:(j+1)*192] = patches.pop()
    return mosaic

out_dir = "/home/smedin7/data/KDM/mosaicsv2" 
image_folder = "/home/smedin7/data/KDM/urkunina_hun_slides/slides_hun" 
tile_size = 192 
wsis = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f)) and not f.startswith('.DS_Store')]
patient_folders = defaultdict(list)

for wsi in wsis:
    patient_id = os.path.basename(wsi)[:4]
    patient_folders[patient_id].append(wsi)

for patient_id, slides in tqdm(patient_folders.items()):
    patches = []
    for slide in slides:
        patches += extract_patches_from_slide(slide, 1, tile_size, tile_size//2)
        if len(patches) >= 36:
            break

    if len(patches) >= 36:
        mosaic_img = Image.fromarray(create_mosaic(patches))
        mosaic_img.save(os.path.join(out_dir, f"{patient_id}.png"))

100%|██████████| 57/57 [02:22<00:00,  2.50s/it]


Urkunina

In [13]:
def extract_patches_from_slide(slide_path, level, patch_size, stride):
    patches = []
    slide = openslide.OpenSlide(slide_path)
    level_dimension = slide.level_dimensions[level]
    downsample = slide.level_downsamples[level]

    xs = np.arange(0, level_dimension[0] - patch_size, stride)
    ys = np.arange(0, level_dimension[1] - patch_size, stride)
    random.shuffle(xs)
    random.shuffle(ys)

    for x in xs:
        for y in ys:
            patch = slide.read_region((int(x * downsample), int(y * downsample)), level, (patch_size, patch_size))
            patch_array = np.array(patch)[:, :, :3]
            if is_tissue_patch(patch_array):
                patches.append(patch_array)
            if len(patches) >= 36: 
                return patches
    return patches

def create_mosaic(patches):
    mosaic = np.zeros((1152, 1152, 3), dtype=np.uint8)
    for i in range(6):
        for j in range(6):
            mosaic[i*192:(i+1)*192, j*192:(j+1)*192] = patches.pop()
    return mosaic

out_dir = "/home/smedin7/data/KDM/mosaicsv2"
image_folder = "/home/smedin7/data/KDM/urkunina_hun_slides/slides_urkunina"
tile_size = 192 

not_processed = []

wsis = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f)) and not f.startswith('.DS_Store')]

for wsi in tqdm(wsis):
    patient_id = os.path.basename(wsi)[:4]
    patches = extract_patches_from_slide(wsi, 1, tile_size, tile_size//2)
    if len(patches) >= 36:
        mosaic_img = Image.fromarray(create_mosaic(patches))
        mosaic_img.save(os.path.join(out_dir, f"{patient_id}.png"))
    else:
        # Append the patient_id to the not_processed list
        not_processed.append(patient_id)

# Print or save the list of not processed patient IDs
print("Not processed patient IDs:", not_processed)

  0%|          | 0/71 [00:00<?, ?it/s]

100%|██████████| 71/71 [03:21<00:00,  2.84s/it]

Not processed patient IDs: []



