# Whole Slide Image sampler module:

****
## Testing with:
```python
# tensorflow==2.0.0rc0
tensorflow==2.0.0
```

In [None]:
import IPython.display as ip_display
import os
import time

import sys
sys.path.insert(0, '../src/python')
from openslide_2_tfrecord import svs_file_to_patches_tfrecord, get_iterable_tfrecord


file_ext_list = ['.svs', '.tif', '.tiff']
def get_file_size_dict(data_dir, reverse_dict=False):
    """ Usage:
    file_size_dict = get_file_size_dict(data_dir)
    """
    file_size_dict = {}
    for f in os.listdir(data_dir):
        ff = os.path.join(data_dir, f)
        if os.path.isfile(ff): 
            _, f_ext = os.path.splitext(ff)
            if f_ext in file_ext_list:
                file_size_dict[ff] = os.path.getsize(ff)
                
    return file_size_dict


## View Available Data

In [None]:
data_dir = '../../DigiPath_MLTK_data/Aperio'

file_size_dict = get_file_size_dict(data_dir)
rev_file_size_dict = {v:k for k, v in file_size_dict.items()}

ordered_keys = sorted(list(file_size_dict.values()))
files_list = []
for k in ordered_keys:
    files_list.append(rev_file_size_dict[k])

print('\nordered by size:\n')
count = 0
for f in files_list:
    _, f_only = os.path.split(f)
    print('%3i %30s: %i'%(count, f_only, file_size_dict[f]))
    count += 1

## Run Test Cell

In [None]:
#   select a file by size number
svs_file_name = os.path.join(data_dir, files_list[0])
print('svs_file_name:', svs_file_name)

output_dir = '../../DigiPath_MLTK_data/module_test'
if os.path.isdir(output_dir) == False:
    print(output_dir, '\n\tnot found')
patch_height = 224
patch_width = 224
patch_size = [patch_height, patch_width]
patch_keep_threshold = 0.5

start_call_time = time.time()
svs_file_conversion_dict = svs_file_to_patches_tfrecord(svs_file_name, 
                                                        output_dir, 
                                                        patch_size, 
                                                        patch_keep_threshold)

tfrecord_file_name = svs_file_conversion_dict['tfrecord_file_name']
number_of_patches = svs_file_conversion_dict['number_of_patches']
mask_dict = svs_file_conversion_dict['mask_dict']
temp_dir = svs_file_conversion_dict['temp_dir']
# ip_display.display(mask_dict['thumb_mask'])

total_run_time = time.time() - start_call_time
print('\n\ntfrecord_file_name\n\t%s\n%i patches in %0.3f seconds run time\n'%(tfrecord_file_name, 
                                                                              number_of_patches, 
                                                                              total_run_time))

### largest run 390,750,635 file size
```text
tfrecord_file_name
	../../DigiPath_MLTK_data/module_test/CMU-2.tfrecords
5561 patches in 73.864 seconds run time
```
## View the tfrecord files:

In [None]:
#                      Get the iteralbe of the tf_record
dk2 = get_iterable_tfrecord(tfrecord_file_name).__iter__()

In [None]:
#                      Re-run this cell to iterate through images
try:
    dakine = dk2.next()
    print(dakine['label'], dakine['image_name'])
    image_raw = dakine['image_raw'].numpy()
    ip_display.display(ip_display.Image(data=image_raw))
except StopIteration:
    print('dakine is empty')
    pass

In [None]:
#                      Get the iteralbe of the tf_record -- is __iter__() needed ??
iterable_tfrecord = get_iterable_tfrecord(tfrecord_file_name)

n_to_show = 1000
for dakine in iterable_tfrecord.take(n_to_show):
    print(dakine['label'], dakine['image_name'])
    image_raw = dakine['image_raw'].numpy()
    ip_display.display(ip_display.Image(data=image_raw))

In [None]:
print('mask_image.size', mask_dict['thumb_mask'].size)
ip_display.display(mask_dict['thumb_mask'])

In [None]:
mask_image = mask_dict['thumb_mask']
mask_image.size

In [None]:
help(svs_file_to_patches_tfrecord)

## main function modules switch function concept:
[main](https://github.com/KnowEnG/Samples_Clustering_Pipeline/blob/master/src/samples_clustering.py) <br>
[toolbox main-helper](https://github.com/KnowEnG/KnowEnG_Pipelines_Library/blob/master/knpackage/toolbox.py) <br>
[module toolbox](https://github.com/KnowEnG/Samples_Clustering_Pipeline/blob/master/src/samples_clustering_toolbox.py) <br>