In [1]:
'''This cell uses python's multiprocessing library to parallelly process the splitting of images and uses cupy which is a Numpy-like library to enable GPU acceleration for image processing tasks'''
import cupy as cp
import multiprocessing as mp
from PIL import Image, ImageFilter
import tiffslide
import os
import glob
import numpy as np

def makeDir(mydir):
    if not os.path.exists(mydir):
        os.mkdir(mydir)

def process_file(file):
    filename = file.split('/')[-1].replace('.svs', '')
    try:
        slide = tiffslide.open_slide(file)
        nx, ny = slide.level_dimensions[0]
        ds = int(slide.level_downsamples[slide.level_count-1])
        print(f'Processing file name: {filename} nx: {nx}    ny: {ny}       downsample: {ds}')
        threshold = 200
        region = slide.read_region((0, 0), slide.level_count-1, slide.level_dimensions[slide.level_count-1])
        binary = region.convert('L').point(lambda p: 255 if p < threshold else 0)
        binary = binary.filter(ImageFilter.MedianFilter(size=29))
        binarynb = cp.array(binary)

        outdirSlide = f"{outdirbase}/{filename}"
        makeDir(outdirSlide)
        c=0
        for ix in range(nx // tileSize):
            if ix % 10 == 0:
                print(f"ix: {ix}/{nx // tileSize}, saved {c} images!")
                c=0
            for iy in range(ny // tileSize):
                xxx = int((ix * tileSize) / ds)
                yyy = int((iy * tileSize) / ds)
                slice_contains_255 = np.any(binarynb[yyy:yyy+tileSize//ds, xxx:xxx+tileSize//ds] == 255)
                # x = tileSize * ix
                # y = tileSize * iy
                # slice_region = binarynb[y:y + tileSize, y:y + tileSize]
                if slice_contains_255:
                # if cp.any(slice_region == 255):
                    # print(f'This has ones!!! {filename}')
                    # print(f'xxx for {filename} = {xxx} & yyy for {filename} = {yyy}')
                    # return
                    c+=1
                    x = tileSize * ix
                    y = tileSize * iy
                    tile = slide.read_region((x, y), 0, (tileSize, tileSize))
                    tile.save(f"{outdirSlide}/{str(nx)}_{str(ny)}_{str(x).zfill(5)}x_{str(y).zfill(5)}y.png")
    except Exception as e:
        print(f'Exception while processing file name: {filename} - {str(e)}')

tileSize = 512
outdirbase = f"/blue/pinaki.sarder/manojkumargalla/PostProcess/data/model2/batch1/3/tiles_3/"
makeDir(outdirbase)
datadir = "/blue/pinaki.sarder/manojkumargalla/PostProcess/data/model2/batch1/3/wsis_3/"
files = glob.glob(f"{datadir}/*.svs")

# Create a pool of workers and process files in parallel
with mp.Pool(processes=mp.cpu_count()) as pool:
    pool.map(process_file, files)


Processing file name: S-2006-004953_PAS_1of2 nx: 85698    ny: 34170       downsample: 32
Processing file name: S-2006-005047_PAS_1of2 nx: 71747    ny: 37788       downsample: 32
Processing file name: S-2010-004277_PAS_2of2 nx: 81712    ny: 32844       downsample: 32
Processing file name: S-2006-002084_PAS_1of3 nx: 65768    ny: 38773       downsample: 32
Processing file name: S-2006-005094_PAS_1of2 nx: 83705    ny: 54810       downsample: 32
Processing file name: S-2006-002045_PAS_1of2 nx: 121572    ny: 43288       downsample: 32
Processing file name: S-2010-004184_PAS_1of2 nx: 111607    ny: 44434       downsample: 32
Processing file name: S-2001-005357_PAS_1of2 nx: 71747    ny: 52156       downsample: 32
Processing file name: S-2006-005000_PAS_1of2 nx: 71747    ny: 49018       downsample: 32
Processing file name: S-1905-017738_PAS_1of2 nx: 153460    ny: 57357       downsample: 64
Processing file name: S-2006-002044_PAS_2of2 nx: 151467    ny: 65777       downsample: 64
Processing file n