In [None]:
# import timm
from PIL import Image, ImageFilter
# from torchvision import transforms
# import torch
import tiffslide
import os
import glob
import numpy as np
 
def makeDir(mydir):
    if not os.path.exists(mydir):
        os.mkdir(mydir)
 
tileSize = 512
 
outdirbase = f"/blue/pinaki.sarder/manojkumargalla/PostProcess/data/WSI_tiles"
 
makeDir(outdirbase)
 
# read slide
datadir = "/blue/pinaki.sarder/manojkumargalla/PostProcess/data/WSIs/"
 
files = glob.glob(f"{datadir}/*.svs")
 
 
for fid in range(len(files)): # range(len(files)):
    filename = files[fid].split('/')[-1].replace('.svs', '')
    try:
        slide = tiffslide.open_slide(files[fid])
 
        nx, ny = slide.level_dimensions[0]
 
        ds = int(slide.level_downsamples[slide.level_count-1])
 
        print(f'Processing file name: {filename} nx: {nx}    ny: {ny}       downsample: {ds}')
        # Extract Binary
        threshold = 200
        region = slide.read_region((0, 0), slide.level_count-1, slide.level_dimensions[slide.level_count-1])
        binary = region.convert('L').point( lambda p: 255 if p < threshold else 0 )
        binary = binary.filter(ImageFilter.MedianFilter(size = 29))
        binarynb = np.array(binary)
 
        '''
        print(np.min(binarynb))
        print(np.max(binarynb))
        print(binarynb.shape)
        '''
        outdirSlide = f"{outdirbase}/{filename}"
        makeDir(outdirSlide)
 
        for ix in range(nx//tileSize):
            if ix%10==0:
                print(f"ix: {ix}/{nx//tileSize}")
            for iy in range(ny//tileSize):
                #print(f"iy: {iy}")
                xxx = int((ix*tileSize)/ds)
                yyy = int((iy*tileSize)/ds)
                if binarynb[yyy, xxx]==255:
                    x = tileSize*ix
                    y = tileSize*iy
 
                    tile = slide.read_region((x, y), 0, (tileSize, tileSize))
 
                    tile.save(f"{outdirSlide}/{str(x).zfill(5)}x_{str(y).zfill(5)}y.png")
    except:
        print(f'Exception while processing file name: {filename}')

In [2]:
'''This cell uses python's multiprocessing library to parallelly process the splitting of images and uses cupy which is a Numpy-like library to enable GPU acceleration for image processing tasks'''
import cupy as cp
import multiprocessing as mp
from PIL import Image, ImageFilter
import tiffslide
import os
import glob
import numpy as np

def makeDir(mydir):
    if not os.path.exists(mydir):
        os.mkdir(mydir)

def process_file(file):
    filename = file.split('/')[-1].replace('.svs', '')
    try:
        slide = tiffslide.open_slide(file)
        nx, ny = slide.level_dimensions[0]
        ds = int(slide.level_downsamples[slide.level_count-1])

        print(f'Processing file name: {filename} nx: {nx}    ny: {ny}       downsample: {ds}')
        threshold = 200
        region = slide.read_region((0, 0), slide.level_count-1, slide.level_dimensions[slide.level_count-1])
        binary = region.convert('L').point(lambda p: 255 if p < threshold else 0)
        binary = binary.filter(ImageFilter.MedianFilter(size=29))
        binarynb = cp.array(binary)

        outdirSlide = f"{outdirbase}/{filename}"
        makeDir(outdirSlide)

        for ix in range(nx // tileSize):
            if ix % 10 == 0:
                print(f"ix: {ix}/{nx // tileSize}")
            for iy in range(ny // tileSize):
                xxx = int((ix * tileSize) / ds)
                yyy = int((iy * tileSize) / ds)
                if binarynb[yyy, xxx] == 255:
                    x = tileSize * ix
                    y = tileSize * iy
                    tile = slide.read_region((x, y), 0, (tileSize, tileSize))
                    tile.save(f"{outdirSlide}/{str(nx)}_{str(ny)}_{str(x).zfill(5)}x_{str(y).zfill(5)}y.png")
    except Exception as e:
        print(f'Exception while processing file name: {filename} - {str(e)}')

tileSize = 512
outdirbase = f"/blue/pinaki.sarder/manojkumargalla/PostProcess/data/WSI_tiles"
makeDir(outdirbase)
datadir = "/blue/pinaki.sarder/manojkumargalla/PostProcess/data/WSIs/"
files = glob.glob(f"{datadir}/*.svs")

# Create a pool of workers and process files in parallel
with mp.Pool(processes=mp.cpu_count()) as pool:
    pool.map(process_file, files)


Processing file name: S-2106-003588_PAS_1of2 nx: 95663    ny: 62252       downsample: 32Processing file name: S-2001-005357_PAS_1of2 nx: 71747    ny: 52156       downsample: 32Processing file name: S-1904-007293_PAS_1of2 nx: 71747    ny: 34815       downsample: 32Processing file name: S-1910-000089_PAS_2of2 nx: 115593    ny: 40475       downsample: 32Processing file name: S-1905-017738_PAS_1of2 nx: 153460    ny: 57357       downsample: 64


Processing file name: S-1905-018731_PAS_2of2 nx: 109614    ny: 30560       downsample: 32Processing file name: S-2103-004857_PAS_2of2 nx: 75733    ny: 51663       downsample: 32Processing file name: 18-162_PAS_4of6 nx: 37866    ny: 38911       downsample: 16
Processing file name: S-1908-010066_PAS_1of2 nx: 39859    ny: 60380       downsample: 32




Processing file name: S-1909-007149_PAS_1of2 nx: 129544    ny: 61132       downsample: 32
ix: 0/77ix: 0/140ix: 0/214
ix: 0/299


ix: 0/140
ix: 10/77ix: 10/214

ix: 10/299ix: 0/147

ix: 0/225
ix: 20/299
i