# Tiling Whole Slide Images
This section will be the second step of our dataset processing, in this section we will tile our svs files into smaller tiff files so that it would become more managable and readable. 

#### Importing Libraries

In [None]:
import os
openslide_path = r"C:\Users\aaron\openslide-win64-20171122\openslide-win64-20171122\bin"
os.environ['PATH'] = openslide_path + ";" + os.environ['PATH']
from openslide import open_slide
import openslide
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
from openslide.deepzoom import DeepZoomGenerator #For tiling and zoom
from NormHnE import norm_HnE
import tifffile as tiff

#### Intial Testing
This step involves doing intial testing this is to check if the WSI will be tiled properly and also is essential for section 3 this slide will be used to create the simple filter of tiles that will be used.

In [None]:
slide = open_slide("D:/Downloads/Grades Folder/SVS-20221218T164716Z-001/SVS/TCGA-2Z-A9JO-01A-01-TS1.6DC87048-FE1E-4795-959F-5AF85DECB6CE.svs") #opening slide using openslide

In [None]:
tiles = DeepZoomGenerator(slide, tile_size=1000, overlap=0, limit_bounds=False) # calling deepzoom generator to zoom in to the slide into a 1000x1000 pixel size

In [None]:
tiles.level_count # counting how many levels are present in the deepzoomed slide

In [None]:
cols,rows = tiles.level_tiles[10] # dividing the max tile level into its rows and columns

In [None]:
for row in range(rows): # looping over every single row
        for col in range(cols): #looping over every single column
            tile_name = str(col) + "_" + str(row) # creating tile name based on column_row number
            #tile_name = os.path.join(tile_dir, '%d_%d' % (col, row))
            #print("Now processing tile with title: ", tile_name)
            temp_tile = tiles.get_tile(10, (col, row)) # getting all the tiles in the highes level tile
            temp_tile_RGB = temp_tile.convert('RGB') # converting to rgb since some images may be in rgba
            temp_tile_np = np.array(temp_tile_RGB) # converting the image into an array
        
            if temp_tile_np.mean() < 200 and temp_tile_np.std() > 50: # using the mean and std generated from blank_partial_good to indicate which tiles to save
                print("Processing tile number:", tile_name)
                norm_img, H_img, E_img = norm_HnE(temp_tile_np, Io=240, alpha=1, beta=0.15) # Calling the norm H&E package
                #Save the norm tile, H and E tiles      
                tiff.imsave("D:/Downloads/Grades Folder/SVS-20221218T164716Z-001/SVS/A9JO/"+tile_name + "_norm.tif", norm_img) # Saving all the H files
                tiff.imsave("D:/Downloads/Grades Folder/SVS-20221218T164716Z-001/SVS/A9JO/"+tile_name + "_H.tif", H_img) # Saving all the E files

#### Actual Tiling of SVS files
This step will be similar to the inital testing above except this step has another for loop that will loop over all the SVS files found in level0 folder which contains the SVS files that will be used. Note: The function used to get the normalized and hematoxilyn slide is in a seperate .py file

In [None]:
src_dir = "E:/level0/" # Whole Slide Images directory
ot_dir = "D:/level1/" # Destination of Tiled files
for (dirpath, dirnames, filenames) in os.walk(src_dir): # Checking WSI directory for all files
    print(filenames)
    for fname in filenames: # Getting filenames
        norml_img=os.path.join(ot_dir+fname+"/Norm/") # Creating filename for generated norm images
        Hl_img=os.path.join(ot_dir+fname+"/H/") # Creating filename for generated norm images
        slide = open_slide(os.path.join(src_dir+fname+"/"+fname)) # Opening the slide based on the filename from the WSI folder
        tiles = DeepZoomGenerator(slide, tile_size=1000, overlap=0, limit_bounds=False) # calling deepzoom generator to zoom in to the slide into a 1000x1000 pixel size
        cols,rows = tiles.level_tiles[12] # Splitting tiles into columns and rows and setting the tile level to 12
        
        for row in range(rows): # looping over every single row
            for col in range(cols): # looping over every single column
                tile_name = str(col) + "_" + str(row) # Creating filename for generated tile
                #tile_name = os.path.join(tile_dir, '%d_%d' % (col, row))
                #print("Now processing tile with title: ", tile_name)
                temp_tile = tiles.get_tile(12, (col, row)) # getting all the tiles in the highest level tile based on columns and rows
                temp_tile_RGB = temp_tile.convert('RGB') # converting to rgb since some images may be in rgba
                temp_tile_np = np.array(temp_tile_RGB) # converting the image into an array
        
                if temp_tile_np.mean() < 200 and temp_tile_np.std() > 50:# using the mean and std generated from blank_partial_good to indicate which tiles to save
                    print("Processing tile number:", tile_name,fname) 
                    norm_img, H_img, E_img = norm_HnE(temp_tile_np, Io=240, alpha=1, beta=0.15) # Calling the norm H&E packages      
                    tiff.imsave(norml_img+tile_name + "_norm.tif", norm_img) # Saving all the norm tiles
                    tiff.imsave(Hl_img+tile_name + "_H.tif", H_img) # Saving all the H tiles
                    
# Note: in the case of the paper we used the Hematoxilyn slides instead of the normalized tiles just delete the tiff.imsave for the tile you do not need        

### Cellprofiler Preprocessing
The cellprofiler will only get the features of 20 images per patient to lessen the computation time and when using the whole dataset it would take a very long time to process the entire thing.

In [None]:
import shutil, random, os
import fnmatch
for file in os.listdir('D:/level1/'): # Going over all the files in Tiles directory
    dirpath = os.path.join('D:/level1/'+file+'/H') # Creating file names of hematoxilyn images
    destDirectory = 'D:/Level2/' # Destination directory
    count = len(fnmatch.filter(os.listdir(dirpath), '*.*')) # Counting all the available files per folder
    print('File Count:', count)
    if count >= 20: # if folder has more than 20 files proceed with the code
        filenames = random.sample(os.listdir(dirpath), 20) # Getting 20 random files from a folder    
        for fname in filenames: # Looping over the 20 selected files
            srcpath = os.path.join(dirpath, fname) # Creating folder path for the 20 files
            shutil.copy(srcpath, os.path.join(destDirectory,file)) # Copying the files into the destination folder
    else: # else if folder has less than 20 files stop the code
        print("noway") 