# Prototype - *get_patch_location_array()* function
    * Common to all 4 patch extraction use cases.
    * Decouples selection image mask & (mask size) from individual use case flows.
    * Insertion point for new or modified location-selection methods.
    * Allow inspection / select-method parameter modification before making output.
    * Allows paralellization of the slowest part - include | reject patch location. <br>
****
****
## Review Existing Selection Algorithm usages
```python
#                   common to both
grey_thumbnail = np.array(thumbnail.convert("L"))
thresh = threshold_otsu(grey_thumbnail)
mask = np.array(grey_thumbnail) < thresh

#                   new in opneslide_2_tfrecord.py
thresh = 80
np_img = np.array(one_thumb.convert('RGB'))
np_img = rgb2lab(np_img)
np_img = np_img[:,:,0]
mask_im = np.array(np_img) < thresh
```
```python
"""
                    87: wsi-sampler/sampler.py
"""
if np.sum(mask[x:x_mask_window, y:y_mask_window]) > 0:
    # include this location ...
```
```python
"""
                    504: DigiPath_MLTK/src/python/openslide_2_tfrecord.py
"""
mask_value = np.float(np.sum(thumb_arr==0)) / np.float(np.prod(thumb_arr.shape))
if mask_value <= drop_threshold:
    # include this location ... 
```
****
****
### Proto function to sleep on
#### Args:
    * wsi_image_name
    * patch_height
    * patch_width
    * patch_selection_method
    * patch_selection_parameter_1, 2, 3,...
        * variables that begin with "patch_selection_parameter_"
        * allow new methods with specific patch select parameters 
    * max_cpus - limit to allow other sections to run <br>
#### Returns:
    * patch selected array
    * rejected patches array <br>
****
### Sleep on it  <:- )
```python
def get_patch_location_array(run_parameters):
    """ temporary directories hold scattered thread outputs """    
    select_list = []
    reject_list = []
    # common for parallel: temp_dir_reject_locations, temp_dir_select_locations
    temp_dir_reject_locations = TemporaryDirectory()
    temp_dir_select_locations = TemporaryDirectory()
    def threshold_otsu_selector():
        # get the mask
        # get the rows & cols array
        # parallelize on selection-worker
        
    def threshold_rgb2lab_selector():
        # get the mask
        # get the rows & cols array
        # parallelize on selection-worker
        
    SELECT_LOCATION_METHOD = {'threshold_otsu', threshold_otsu_selector, 
                              'threshold_rgb2lab': threshold_rgb2lab_selector}
    
    patch_select_method = run_parameters['patch_select_method']
    
    if patch_select_method in SELECT_LOCATION_METHOD:
        rc = SELECT_LOCATION_METHOD[patch_select_method](run_parameters)
    
    # collect function: merge all temp files into a list
    if rc == 0:
        # collect the mini-files into the select and reject location lists
        select_list = list_from_temp_dir(temp_dir_select_locations)
        reject_list = list_from_temp_dir(temp_dir_reject_locations)
        
    # cleanup
    temp_dir_reject_locations.cleanup()
    temp_dir_select_locations.cleanup()
    
    return select_list, reject_list
```

In [58]:
import os
data_dir = '../../DigiPath_MLTK_data/Aperio/'
os.listdir(data_dir)

['JP2K-33003-2.svs',
 'JP2K-33003-1.svs',
 'CMU-1-JP2K-33005.svs',
 'CMU-1.svs',
 'CMU-3.svs',
 'CMU-1-Small-Region.svs',
 'CMU-2.svs']

In [65]:
import sys
import openslide

sys.path.insert(0, '../src/python')
from digipath_toolkit import *


def get_patch_location_array(run_parameters):
    """ Usage: patch_location_array = get_patch_location_array(run_parameters)
    Args:
        run_parameters:     keys:
                            image_file_name,
                            thumbnail_divisor,
                            patch_select_method,
                            patch_height,
                            patch_width
    Returns:
        patch_location_array
        
    """
    patch_location_array = []
    
    image_file_name = run_parameters['image_file_name']
    thumbnail_divisor = run_parameters['thumbnail_divisor']
    patch_select_method = run_parameters['patch_select_method']
    patch_height = run_parameters['patch_height']
    patch_width = run_parameters['patch_width']
    
    #                     OpenSlide open                      #
    os_im_obj = openslide.OpenSlide(image_file_name)
    
    pixels_height = os_im_obj.dimensions[1]
    rows_fence_array = get_fence_array(patch_length=patch_height, overall_length=pixels_height)
    
    pixels_width = os_im_obj.dimensions[0]
    cols_fence_array = get_fence_array(patch_length=patch_width, overall_length=pixels_width)
        
    small_im = os_im_obj.get_thumbnail((pixels_height // thumbnail_divisor, 
                                        pixels_width // thumbnail_divisor))
    os_im_obj.close()
    #                     OpenSlide close                     #
    
    mask_im = get_sample_selection_mask(small_im, patch_select_method)
        
    it_rows = zip(rows_fence_array[:,0] // thumbnail_divisor, 
                  rows_fence_array[:,1] // thumbnail_divisor, 
                  rows_fence_array[:,0])
    
    lft_cols = cols_fence_array[:,0] // thumbnail_divisor
    rgt_cols = cols_fence_array[:,1] // thumbnail_divisor
    cols_array = cols_fence_array[:,0]
    
    for tmb_row_top, tmb_row_bot, row_n in it_rows:
        it_cols = zip(lft_cols, rgt_cols, cols_array)
        for tmb_col_lft, tmb_col_rgt, col_n in it_cols:
            if (mask_im[tmb_row_top:tmb_row_bot, tmb_col_lft:tmb_col_rgt]).sum() > 0:
                patch_location_array.append((row_n, col_n))
                
    return patch_location_array

data_dir = '../../DigiPath_MLTK_data/Aperio/'

run_parameters = {}
run_parameters['image_file_name'] = os.path.join(data_dir, 'CMU-1-Small-Region.svs')
run_parameters['thumbnail_divisor'] = 40
run_parameters['patch_select_method'] = 'threshold_rgb2lab' # 'threshold_otsu'
run_parameters['patch_height'] = 224
run_parameters['patch_width'] = 224

patch_location_array = get_patch_location_array(run_parameters)
print('%i images found\n'%(len(patch_location_array)))
for d in patch_location_array:
    print(d)


42 images found

(28, 550)
(28, 774)
(28, 998)
(252, 550)
(252, 774)
(252, 998)
(476, 550)
(476, 774)
(476, 998)
(476, 1446)
(700, 102)
(700, 326)
(700, 550)
(700, 774)
(700, 998)
(700, 1222)
(924, 550)
(924, 774)
(924, 998)
(924, 1222)
(1148, 550)
(1148, 774)
(1148, 998)
(1148, 1222)
(1372, 550)
(1372, 774)
(1372, 998)
(1372, 1222)
(1596, 550)
(1596, 774)
(1596, 998)
(1596, 1222)
(1820, 326)
(1820, 550)
(1820, 774)
(1820, 998)
(1820, 1222)
(2044, 326)
(2044, 550)
(2044, 774)
(2044, 998)
(2044, 1222)
