In [None]:
%matplotlib inline

# Roundtrip run module with a list of WSI image files
Requires data directory *DigiPath_MLTK_data/(image data)* in same level as DigiPath_MLTK clone directory <br>
(See jupyter notebook: *1_download_openslide_data.ipynb* in this directory). <br>
    
```python
# <o><o> set the images directory in the cell below red comment:  
"""                                    Set the name of the image files directory here  """
```

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tempfile import TemporaryDirectory
from PIL import ImageDraw

import IPython.display as ip_display
import os
import sys
import time
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import skimage
from skimage.filters import threshold_otsu

import openslide

import PIL
from PIL.Image import Image

#       module import - commented out to use modified code below
sys.path.insert(0, '../src/python')
from openslide_2_tfrecord import *

file_ext_list = ['.svs', '.tif', '.tiff', '.tfrecords']
def get_file_size_dict(data_dir, reverse_dict=False):
    """ Use to get size-ranked list of files in a directory:
    file_size_dict = get_file_size_dict(data_dir)
    """
    file_size_dict = {}
    for f in os.listdir(data_dir):
        ff = os.path.join(data_dir, f)
        if os.path.isfile(ff): 
            _, f_ext = os.path.splitext(ff)
            if f_ext in file_ext_list:
                file_size_dict[ff] = os.path.getsize(ff)
                
    return file_size_dict

"""                                    Set the name of the image files directory here  """
data_dir = '../../DigiPath_MLTK_data/Aperio'

file_size_dict = get_file_size_dict(data_dir)
rev_file_size_dict = {v:k for k, v in file_size_dict.items()}

ordered_keys = sorted(list(file_size_dict.values()))
files_list = []
for k in ordered_keys:
    files_list.append(rev_file_size_dict[k])

#                                       display the list of files:
print('\nordered by size:\n')
count = 0
for f in files_list:
    _, f_only = os.path.split(f)
    print('%3i %30s: %i'%(count, f_only, file_size_dict[f]))
    count += 1

## next cell converts all listed images into TFRecord files

In [None]:
imf_tfr_list = []
for fn in range(len(files_list)):
    print(fn, '\n', files_list[fn])

    svs_file_name = os.path.join(data_dir, files_list[fn])

    output_dir = '../../DigiPath_MLTK_data/module_test'
    if os.path.isdir(output_dir) == False:
        print(output_dir, '\n\tnot found')

    patch_size = [224, 224]
    
    """                        patch_keep_threshold (0, 1)    Larger is more patches  <o><o>  """
    patch_keep_threshold = 0.9

    start_call_time = time.time()
    report_dict = svs_file_to_patches_tfrecord(svs_file_name, output_dir, patch_size, patch_keep_threshold)

    tfrecord_file_name = report_dict['tfrecord_file_name']
    
    imf_tfr_list.append([svs_file_name, tfrecord_file_name])
    
    number_of_patches = report_dict['number_of_patches']
    mask_dict = report_dict['mask_dict']
    temp_dir = report_dict['temp_dir']

    total_run_time = time.time() - start_call_time
    print('%s\n%i patches, %0.3fs run time\n\n'%(tfrecord_file_name, number_of_patches, total_run_time))

## Display patch locations of TFRecord, WSI files as thumbnail image

### Note that all boxes represent 224x224 patches

In [None]:
for k in range(len(imf_tfr_list)):
    tfrecord_filename = imf_tfr_list[k][1]
    svs_file_name = imf_tfr_list[k][0]
    print('\n', tfrecord_filename, '\n', svs_file_name)
    wsi_filename = svs_file_name
    start_call_time = time.time()
    o_thum = get_tfrecord_marked_thumbnail(tfrecord_filename, wsi_filename, border_color='turquoise')
    print(o_thum.size, 'thumbnail image in %0.3fs'%(time.time() - start_call_time))
    display(o_thum)