## scipy image: cleaning, segmentation, etc.
[API Reference: scikit-image](https://scikit-image.org/docs/dev/api/api.html) <br>
****
[Home: scikit-image aka skimage](https://scikit-image.org/) <br>
[Documentation: scikit-image](https://scikit-image.org/docs/stable/) <br>
[scikit-image on GitHub](https://github.com/scikit-image/scikit-image) <br>
[User Guide: scikit-image](https://scikit-image.org/docs/dev/user_guide) <br>
[Founder-tutor](https://bids.berkeley.edu/resources/videos/scikit-image-image-analysis-python-intermediate) <br>
****
## OpenSlide:
[OpenSlide Wiki](https://github.com/openslide/openslide/wiki) <br>
[OpenSlide api](https://openslide.org/api/python/) <br>
[openslide github](https://github.com/openslide/openslide-python) <br>
[openslide dot org](https://openslide.org/) <br>
[openslide C docs](https://openslide.org/api/openslide_8h.html) <br>

In [1]:
import os
import time
import glob
import math
import os

import numpy as np
import openslide
from skimage.filters import threshold_otsu

output_dir = '../../DigiPath_MLTK_data/out_to_test'
if os.path.isdir(output_dir) == False:
    os.makedirs(output_dir)
    
OUTPUT_PATH = output_dir
data_dir = '../../DigiPath_MLTK_data/Aperio'
files_list = ['CMU-1-Small-Region.svs', 'CMU-1.svs']
SVS = os.path.join(data_dir, files_list[0])
print('Working with file:', SVS, '\nOutput path:', OUTPUT_PATH)

Working with file: ../../DigiPath_MLTK_data/Aperio/CMU-1-Small-Region.svs 
Output path: ../../DigiPath_MLTK_data/out_to_test


In [3]:
out_dir = '../../DigiPath_MLTK_data/out_to_test'
file_types = ['.jpg']
files_list = []
for maybe_file in os.listdir(out_dir):
    full_path = os.path.join(out_dir, maybe_file)
    if os.path.isfile(full_path):
        _, f_ext = os.path.splitext(maybe_file)
        if f_ext in file_types:
            files_list.append(full_path)

print(len(files_list))

1000


In [12]:
"""
need to know what size the pre-trained models used
& graphic file types preferred by tensorflow (float, float32, int, etc)
"""
t0 = time.time()
# open the file
test_file_name = os.path.join(data_dir, 'CMU-1.svs')
print('Opening Image file: \n\t{}\n'.format(test_file_name))
os_obj = openslide.OpenSlide(test_file_name)
pixels_height = os_obj.dimensions[0]
pixels_width = os_obj.dimensions[1]
print('Image is {} x {}\n{} pixels'.format(pixels_height, pixels_width, pixels_height * pixels_width))

# read the meta data
pixels_height_ds = os_obj.level_dimensions[-1][0]
pixels_width_ds = os_obj.level_dimensions[-1][1]
print(pixels_height_ds, pixels_width_ds)

# find the thumbnail size ratio (need pre-trained size)
print(pixels_height / pixels_height_ds, pixels_width / pixels_width_ds)
print('obj.level_downsamples', os_obj.level_downsamples)
# make the mask / grid

# make the directory

# get the ndarrays of full size grid, write as type

# (save a list of files)

print('\ndesign phase', time.time() - t0)

Opening Image file: 
	../../DigiPath_MLTK_data/Aperio/CMU-1.svs

Image is 46000 x 32914
1514044000 pixels
2875 2057
16.0 16.000972289742343
obj.level_downsamples (1.0, 4.000121536217793, 16.00048614487117)

design phase 0.016112089157104492


In [13]:
help(os_obj)

Help on OpenSlide in module openslide object:

class OpenSlide(AbstractSlide)
 |  OpenSlide(filename)
 |  
 |  An open whole-slide image.
 |  
 |  close() is called automatically when the object is deleted.
 |  The object may be used as a context manager, in which case it will be
 |  closed upon exiting the context.
 |  
 |  If an operation fails, OpenSlideError is raised.  Note that OpenSlide
 |  has latching error semantics: once OpenSlideError is raised, all future
 |  operations on the OpenSlide object, other than close(), will fail.
 |  
 |  Method resolution order:
 |      OpenSlide
 |      AbstractSlide
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, filename)
 |      Open a whole-slide image.
 |  
 |  __repr__(self)
 |      Return repr(self).
 |  
 |  close(self)
 |      Close the OpenSlide object.
 |  
 |  get_best_level_for_downsample(self, downsample)
 |      Return the best level for displaying the given downsample.
 |  
 |  read_region(self, 

```python
MAXPATCHESTOWRITE = 1000
import glob
import math
import os
import numpy as np
import openslide
from skimage.filters import threshold_otsu

def get_thumbnail(img, x_level0, y_level0, patch_size, output_path):
    patch = img.read_region((x_level0, y_level0), 0, (patch_size, patch_size))
    patch = patch.convert('RGB')
    fname = img.properties['aperio.Filename'].replace(' ', '_')
    fname += '_' + str(x_level0)
    fname += '_' + str(y_level0)
    fname += '_' + '0'
    fname += '_' + str(patch_size)
    fname += '.jpg'
    patch_arr = np.array(patch.convert('L'))
    if np.average(patch_arr) < 230:
        global num_patches
        num_patches += 1
        patch.save(os.path.join(output_path, fname))

def process_svs(SVS,
                normalization_factor=1000,
                patch_size=512,
                buffer=10,
                output_path=OUTPUT_PATH):
    img = openslide.OpenSlide(SVS)

    global num_patches
    num_patches = 0

    x_max, y_max = img.dimensions
    thumbnail = img.get_thumbnail((normalization_factor, normalization_factor))

    grey_thumbnail = np.array(thumbnail.convert("L"))
    thresh = threshold_otsu(grey_thumbnail)
    mask = np.array(grey_thumbnail) < thresh

    # how many pixels in the raw image per pixel in mask
    x_num_orgPix_per_thumbPix = math.ceil(x_max / mask.shape[0])
    y_num_orgPix_per_thumbPix = math.ceil(y_max / mask.shape[1])
    # print(x_num_orgPix_per_thumbPix, y_num_orgPix_per_thumbPix)

    # Find out how many pixels in image mask to count as a patch in original
    num_x_mask_pixels_per_rawPatch = math.ceil(patch_size / x_num_orgPix_per_thumbPix)
    num_y_mask_pixels_per_rawPatch = math.ceil(patch_size / y_num_orgPix_per_thumbPix)
    # print(num_x_mask_pixels_per_rawPatch, num_y_mask_pixels_per_rawPatch)

    mask_x, mask_y = mask.shape
    x_mask_prev = 0

    # Iterate through the mask to identify positive pixels
    for x in range(buffer, mask_x - buffer):
        x_mask_window = x + num_x_mask_pixels_per_rawPatch
        if x_mask_window <= x_mask_prev:
            continue
        y_mask_prev = 0
        for y in range(buffer, mask_y - buffer):
            y_mask_window = y + num_y_mask_pixels_per_rawPatch
            # print('Evaluate: {} {} & {}'.format(y, y_mask_window, y_mask_prev))
            if y_mask_window <= y_mask_prev:
                continue
            if y % 100 == 0:
                print('X: {}\tY:{} of {} with total of {} so far'.format(x, y, 
                                                                         mask.shape, 
                                                                         num_patches), end='\r',
                                                                  flush=True)
                
            if np.sum(mask[x:x_mask_window, y:y_mask_window]) > 0:
                # convert mask coordinates to level0 coordinates
                x_level0 = x * x_num_orgPix_per_thumbPix
                y_level0 = y * y_num_orgPix_per_thumbPix
                get_thumbnail(img, x_level0, y_level0, patch_size, output_path)
                
                #                                      <0><0>          Limit number of files
                if num_patches >= MAXPATCHESTOWRITE:
                    print('\n\n\t{} num_patched out\n\n'.format(num_patches))
                    return
                #                                      <0><0>

            # print('yamsk windoe: {}'.format(y_mask_window))
            y_mask_prev = y_mask_window
        x_mask_prev = x_mask_window

    print('Printed {} from {}'.format(num_patches, SVS))

num_patches = 0
"""
# if __name__ == '__main__':
#     for SVS in glob.glob('/data/biliary/svs/negative/*svs'):
#         process_svs(SVS)
"""
print('processing: {}'.format(SVS))
process_svs(SVS)
```